From 0764a030e673b3bcf51a9e09c4978434e4be2cbc Mon Sep 17 00:00:00 2001
From: "JASON-6700K\\jandersen" <JGAndersen@gmail.com>
Date: Sun, 19 Jul 2020 14:19:42 -0400
Subject: [PATCH] GSLA:  encoder, work in progress

---
 source/gsla_file.cpp | 129 ++++++++++++++++++++++++--
 source/gsla_file.h   |   6 +-
 source/lzb.cpp       | 214 +++++++++++++++++++++++++++++++++++++------
 3 files changed, 312 insertions(+), 37 deletions(-)

diff --git a/source/gsla_file.cpp b/source/gsla_file.cpp
index 3d9b8ce..ff441e3 100644
--- a/source/gsla_file.cpp
+++ b/source/gsla_file.cpp
@@ -242,7 +242,7 @@ void GSLAFile::LoadFromFile(const char* pFilePath)
 			if (pINIT->IsValid())
 			{
 				// We have an initial frame chunk
-				UnpackInitialFrame(pINIT);
+				UnpackInitialFrame(pINIT, pHeader);
 			}
 			else if (pANIM->IsValid())
 			{
@@ -263,13 +263,13 @@ void GSLAFile::LoadFromFile(const char* pFilePath)
 // Unpack the initial frame, that's been packed with an empty initial dictionary
 // So every byte of the buffer will be written out (no skip opcodes)
 //
-void GSLAFile::UnpackInitialFrame(GSLA_INIT* pINIT)
+void GSLAFile::UnpackInitialFrame(GSLA_INIT* pINIT, GSLA_Header* pHeader)
 {
 	unsigned char *pData = ((unsigned char*)pINIT) + sizeof(GSLA_INIT);
 
 	unsigned char *pTargetBuffer = m_pC1PixelMaps[ 0 ]; // Data needs to be pre allocated
 
-	//DecompressFrame(pTargetBuffer, pData);
+	DecompressFrame(pTargetBuffer, pData, (unsigned char*)pHeader);
 }
 
 //------------------------------------------------------------------------------
@@ -359,9 +359,8 @@ void GSLAFile::SaveToFile(const char* pFilenamePath)
 	unsigned char* pInitialFrame = m_pC1PixelMaps[ 0 ];
 
 	// We're not worried about bank wrap on the first frame, and we don't have a pre-populated
-	// dictionary
-	int compressedSize = LZBA_Compress(pWorkBuffer, pInitialFrame, m_frameSize,
-									   pInitialFrame, pInitialFrame);
+	// dictionary - Also use the best compression we can get here
+	int compressedSize = Old_LZB_Compress(pWorkBuffer, pInitialFrame, m_frameSize);
 
 	for (int compressedIndex = 0; compressedIndex < compressedSize; ++compressedIndex)
 	{
@@ -458,4 +457,122 @@ void GSLAFile::SaveToFile(const char* pFilenamePath)
 }
 
 //------------------------------------------------------------------------------
+//
+// Std C memcpy seems to be stopping the copy from happening, when I overlap
+// the buffer to get a pattern run copy (overlapped buffers)
+//
+static void my_memcpy(unsigned char* pDest, unsigned char* pSrc, int length)
+{
+	while (length-- > 0)
+	{
+		*pDest++ = *pSrc++;
+	}
+}
+
+//------------------------------------------------------------------------------
+//
+//  pTarget is the Target Frame Buffer
+//  pData is the source data for a Frame
+// 
+//  pDataBaseAddress, is the base address wheret the animation file was loaded
+//  this is used so we can properly interpret bank-skip opcodes (data is broken
+//  into 64K chunks for the IIgs/65816)
+//  
+//  returns the number of bytes that have been processed in the pData
+//
+int GSLAFile::DecompressFrame(unsigned char* pTarget, unsigned char* pData, unsigned char* pDataBaseAddress)
+{
+	unsigned char *pDataStart = pData;
+
+	int cursorPosition = 0;
+	unsigned short opcode;
+
+	bool bDoWork = true;
+
+	while (bDoWork)
+	{
+		opcode  = pData[0];
+		opcode |= (((unsigned short)pData[1])<<8);
+
+		if (opcode & 0x8000)
+		{
+			if (opcode & 0x0001)
+			{
+				// Cursor Skip Forward
+				opcode = (opcode>>1) & 0x3FFF;
+				cursorPosition += (opcode+1);
+				pData+=2;
+			}
+			else
+			{
+				// Dictionary Copy
+				unsigned short dictionaryPosition = pData[2];
+				dictionaryPosition |= (((unsigned short)pData[3])<<8);
+
+				dictionaryPosition -= 0x2000;	// it's like this to to help the
+											    // GS decode it quicker
+
+				unsigned short length = ((opcode>>1) & 0x3FFF)+1;
+
+				my_memcpy(pTarget + cursorPosition, pTarget + dictionaryPosition, (int) length );
+
+				pData += 4;
+				cursorPosition += length;
+			}
+		}
+		else
+		{
+			if (opcode & 0x0001)
+			{
+				// Literal Copy Bytes
+				pData += 2;
+				unsigned short length = ((opcode>>1) & 0x3FFF)+1;
+
+				my_memcpy(pTarget + cursorPosition, pData, (int) length);
+
+				pData += length;
+				cursorPosition += length;
+			}
+			else
+			{
+				opcode = ((opcode>>1)) & 3;
+
+				switch (opcode)
+				{
+				case 0: // Source bank Skip
+					{
+						int offset = (int)(pData - pDataBaseAddress);
+
+						offset &= 0xFFFF0000;
+						offset += 0x00010000;
+
+						pData = pDataBaseAddress + offset;
+					}
+					break;
+				case 1: // End of frame
+					pData+=2;
+					bDoWork = false;
+					break;
+
+				case 2: // End of Animation
+					// Intentionally, leave cursor alone here
+					bDoWork = false;
+					break;
+
+				default:
+					// Reserved / Illegal
+					bDoWork = false;
+					break;
+				}
+
+			}
+		}
+
+
+	}
+
+	return (int)(pData - pDataStart);
+}
+
+//------------------------------------------------------------------------------
 
diff --git a/source/gsla_file.h b/source/gsla_file.h
index 70bf6aa..493ea91 100644
--- a/source/gsla_file.h
+++ b/source/gsla_file.h
@@ -110,7 +110,7 @@ typedef struct GSLA_CHUNK
 class GSLAFile
 {
 public:
-	// Load in a C2 File
+	// Load in a GSLA File
 	GSLAFile(const char *pFilePath);
 	~GSLAFile();
 
@@ -127,9 +127,11 @@ public:
 
 	const std::vector<unsigned char*>& GetPixelMaps() { return m_pC1PixelMaps; }
 
+	int DecompressFrame(unsigned char* pTarget, unsigned char* pData, unsigned char* pDataBaseAddress);
+
 private:
 
-	void UnpackInitialFrame(GSLA_INIT* pINIT);
+	void UnpackInitialFrame(GSLA_INIT* pINIT, GSLA_Header* pHeader);
 	void UnpackAnimation(GSLA_ANIM* pANIM, GSLA_Header* pHeader);
 
 
diff --git a/source/lzb.cpp b/source/lzb.cpp
index 98caa18..03afada 100644
--- a/source/lzb.cpp
+++ b/source/lzb.cpp
@@ -29,13 +29,14 @@ static int DictionaryMatch(const DataString& data, int dictionarySize);
 
 // Stuff I need for a faster version
 static DataString LongestMatch(const DataString& data, const DataString& dictionary);
+static DataString LongestMatch(const DataString& data, const DataString& dictionary, int cursorPosition);
 
 //
 //  New Version, still Brute Force, but not as many times
 //
 int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize)
 {
-	printf("LZB Compress %d bytes\n", sourceSize);
+	//printf("LZB Compress %d bytes\n", sourceSize);
 
 	unsigned char *pOriginalDest = pDest;
 
@@ -103,7 +104,7 @@ int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize)
 //
 int Old_LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize)
 {
-	printf("LZB_Compress %d bytes\n", sourceSize);
+	//printf("LZB_Compress %d bytes\n", sourceSize);
 
 	// Initialize Dictionary
 	int bytesInDictionary = 0;		// eventually add the ability to start with the dictionary filled
@@ -345,7 +346,102 @@ DataString LongestMatch(const DataString& data, const DataString& dictionary)
 
 	return result;
 }
+//------------------------------------------------------------------------------
+DataString LongestMatch(const DataString& data, const DataString& dictionary, int cursorPosition)
+{
+	DataString result;
+	result.pData = nullptr;
+	result.size = 0;
 
+	// Find the longest matching data in the dictionary
+	if ((dictionary.size > 0) && (data.size > 0))
+	{
+		DataString candidate;
+		candidate.pData = data.pData;
+		candidate.size = 0;
+
+		// First Check for a pattern / run-length style match
+		// Check the end of the dictionary, to see if this data could be a
+		// pattern "run" (where we can repeat a pattern for X many times for free
+		// using the memcpy with overlapping source/dest buffers)
+		// (This is a dictionary based pattern run/length)
+		{
+			// Check for pattern sizes, start small
+			int max_pattern_size = 4096;
+			if (dictionary.size < max_pattern_size)  max_pattern_size = dictionary.size;
+			if (data.size < max_pattern_size) max_pattern_size = data.size;
+
+			for (int pattern_size = 1; pattern_size <= max_pattern_size; ++pattern_size)
+			{
+				int pattern_start = dictionary.size - pattern_size;
+
+				for (int dataIndex = 0; dataIndex < data.size; ++dataIndex)
+				{
+					if (data.pData[ dataIndex ] == dictionary.pData[ pattern_start + (dataIndex % pattern_size) ])
+					{
+						candidate.pData = dictionary.pData + pattern_start;
+						candidate.size = dataIndex+1;
+						continue;
+					}
+
+					break;
+				}
+
+				//if (candidate.size < pattern_size)
+				//	break;
+
+				if (candidate.size > result.size)
+				{
+					result = candidate;
+				}
+			}
+		}
+
+		// As an optimization
+		int dictionarySize = dictionary.size; // - 1;	// This last string has already been checked by, the
+												    // run-length matcher above
+
+		// As the size grows, we're missing potential matches in here
+		// I think the best way to counter this is to attempt somthing
+		// like KMP
+
+		if (dictionarySize > candidate.size)
+		{
+			// Check the dictionary for a match, brute force
+			for (int dictionaryIndex = 0; dictionaryIndex <= (dictionarySize-candidate.size); ++dictionaryIndex)
+			{
+				int sizeAvailable = dictionarySize - dictionaryIndex;
+
+				if (sizeAvailable > data.size) sizeAvailable = data.size;
+
+				// this could index off the end of the dictionary!!! FIX ME
+				for (int dataIndex = 0; dataIndex < sizeAvailable; ++dataIndex)
+				{
+					if (data.pData[ dataIndex ] == dictionary.pData[ dictionaryIndex + dataIndex ])
+					{
+						if (dataIndex >= candidate.size)
+						{
+							candidate.pData = dictionary.pData + dictionaryIndex;
+							candidate.size = dataIndex + 1;
+						}
+						continue;
+					}
+
+					break;
+				}
+
+				if (candidate.size > result.size)
+				{
+					result = candidate;
+					//dictionaryIndex = -1;
+					break;
+				}
+			}
+		}
+	}
+
+	return result;
+}
 
 //------------------------------------------------------------------------------
 //
@@ -576,6 +672,11 @@ void LZB_Decompress(unsigned char* pDest, unsigned char* pSource, int destSize)
 //------------------------------------------------------------------------------
 //
 // Compress a Frame in the GSLA LZB Format
+// 
+// The dictionary is also the canvas, so when we're finished the dictionary
+// buffer will match the original pSource buffer
+// 
+// If they both match to begin with, we just crap out an End of Frame opcode
 //
 int LZBA_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize,
 				  unsigned char* pDataStart, unsigned char* pDictionary,
@@ -583,6 +684,10 @@ int LZBA_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize,
 {
 //	printf("LZBA Compress %d bytes\n", sourceSize);
 
+	// Used for bank skip opcode emission
+	int bankOffset = (int)((pDest - pDataStart) & 0xFFFF);
+
+	// So we can track how big our compressed data ends up being
 	unsigned char *pOriginalDest = pDest;
 
 	DataString sourceData;
@@ -601,44 +706,95 @@ int LZBA_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize,
 	bool bLastEmitIsLiteral = false;
 	unsigned char* pLastLiteralDest = nullptr;
 
-	while (sourceData.size > 0)
+	int lastEmittedCursorPosition = 0; // This is the default for each frame
+
+	for (int cursorPosition = 0; cursorPosition < dictionarySize;)
 	{
-		candidateData = LongestMatch(sourceData, dictionaryData);
-
-		// If no match, or the match is too small, then take the next byte
-		// and emit as literal
-		if ((0 == candidateData.size)) // || (candidateData.size < 4))
+		if (pSource[ cursorPosition ] != pDictionary[ cursorPosition ])
 		{
-			candidateData.size = 1;
-			candidateData.pData = sourceData.pData;
-		}
+			// Here is some data that has to be processed, so let's decide
+			// how large of a chunk of data we're looking at here
 
-		// Adjust source stream
-		sourceData.pData += candidateData.size;
-		sourceData.size  -= candidateData.size;
+			// Do we need to emit a Skip opcode?, compare cursor to last emit
+			// and emit a skip command if we need it (I'm going want a gap of
+			// at least 3 bytes? before we call it the end
+			int tempCursorPosition = cursorPosition;
+			int gapCount = 0;
+			for (; tempCursorPosition < dictionarySize; ++tempCursorPosition)
+			{
+				if (pSource[ cursorPosition ] != pDictionary[ cursorPosition ])
+				{
+					gapCount = 0;
+				}
+				else
+				{
+					gapCount++;
+					if (gapCount >= 3)
+						break;
+				}
+			}
 
-		dictionaryData.size = AddDictionary(candidateData, dictionaryData.size);
+			tempCursorPosition -= gapCount;
 
-		if (candidateData.size > 3)
-		{
-			// Emit a dictionary reference
-			pDest += (int)EmitReference(pDest, (int)(candidateData.pData - dictionaryData.pData), candidateData);
-			bLastEmitIsLiteral = false;
-		}
-		else if (bLastEmitIsLiteral)
-		{
-			// Concatenate this literal onto the previous literal
-			pDest += ConcatLiteral(pLastLiteralDest, candidateData);
+			// Now we know from cursorPosition to tempCursorPosition is data
+			// that we want to encode, we either literally copy it, or look
+			// to see if this data is already in the dictionary (so we can copy
+			// it from one part of the frame buffer to another part)
+
+			sourceData.pData = &pSource[ cursorPosition ];
+			sourceData.size = tempCursorPosition - cursorPosition;
+
+			while (sourceData.size > 0)
+			{
+				candidateData = LongestMatch(sourceData, dictionaryData, cursorPosition);
+
+				// If no match, or the match is too small, then take the next byte
+				// and emit as literal
+				if ((0 == candidateData.size)) // || (candidateData.size < 4))
+				{
+					candidateData.size = 1;
+					candidateData.pData = sourceData.pData;
+				}
+
+				// Adjust source stream
+				sourceData.pData += candidateData.size;
+				sourceData.size  -= candidateData.size;
+
+				// Modify the dictionary
+				cursorPosition = AddDictionary(candidateData, cursorPosition);
+				lastEmittedCursorPosition = cursorPosition;
+
+				if (candidateData.size > 3)
+				{
+					// Emit a dictionary reference
+					pDest += (int)EmitReference(pDest, (int)(candidateData.pData - dictionaryData.pData), candidateData);
+					bLastEmitIsLiteral = false;
+				}
+				else if (bLastEmitIsLiteral)
+				{
+					// Concatenate this literal onto the previous literal
+					pDest += ConcatLiteral(pLastLiteralDest, candidateData);
+				}
+				else
+				{
+					// Emit a new literal
+					pLastLiteralDest = pDest;
+					bLastEmitIsLiteral = true;
+					pDest += EmitLiteral(pDest, candidateData);
+				}
+			}
 		}
 		else
 		{
-			// Emit a new literal
-			pLastLiteralDest = pDest;
-			bLastEmitIsLiteral = true;
-			pDest += EmitLiteral(pDest, candidateData);
+			// no change
+			cursorPosition++;
 		}
 	}
 
+	// Emit the End of Frame Opcode
+	*pDest++ = 0x02;
+	*pDest++ = 0x00;
+
 	return (int)(pDest - pOriginalDest);
 
 }