From f80008ca2110e681f1d9b27fb7e32af4990f0d45 Mon Sep 17 00:00:00 2001
From: "JASON-6700K\\jandersen" <JGAndersen@gmail.com>
Date: Thu, 16 Jul 2020 19:05:36 -0400
Subject: [PATCH] faster compressor: WIP

---
 source/lzb.cpp  | 54 +++++++++++++++++++++++++++++++++++--------------
 source/lzb.h    |  1 +
 source/main.cpp |  6 ++++++
 3 files changed, 46 insertions(+), 15 deletions(-)

diff --git a/source/lzb.cpp b/source/lzb.cpp
index 281b02c..87212ef 100644
--- a/source/lzb.cpp
+++ b/source/lzb.cpp
@@ -9,6 +9,7 @@
 #include "bctypes.h"
 
 #define MAX_DICTIONARY_SIZE (32 * 1024)
+#define MAX_STRING_SIZE     (16383)
 //
 // Yes This is a 32K Buffer, of bytes, with no structure to it
 //
@@ -98,7 +99,7 @@ int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize)
 // This works, but it's stupidly slow, because it uses brute force, and
 // because the brute force starts over everytime I grow the data string
 //
-int old_LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize)
+int Old_LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize)
 {
 	printf("LZB_Compress %d bytes\n", sourceSize);
 
@@ -257,7 +258,6 @@ DataString LongestMatch(const DataString& data, const DataString& dictionary)
 
 			for (int pattern_size = 1; pattern_size <= max_pattern_size; ++pattern_size)
 			{
-				bool bMatch = true;
 				int pattern_start = dictionary.size - pattern_size;
 
 				for (int dataIndex = 0; dataIndex < data.size; ++dataIndex)
@@ -269,12 +269,11 @@ DataString LongestMatch(const DataString& data, const DataString& dictionary)
 						continue;
 					}
 
-					bMatch = false;
 					break;
 				}
 
-				if (candidate.size < pattern_size)
-					break;
+				//if (candidate.size < pattern_size)
+				//	break;
 
 				if (candidate.size > result.size)
 				{
@@ -284,34 +283,42 @@ DataString LongestMatch(const DataString& data, const DataString& dictionary)
 		}
 
 		// As an optimization
-		int dictionarySize = dictionary.size - 1;	// This last string has already been checked by, the
+		int dictionarySize = dictionary.size; // - 1;	// This last string has already been checked by, the
 												    // run-length matcher above
 
-		if (dictionarySize >= candidate.size)
+		// As the size grows, we're missing potential matches in here
+		// I think the best way to counter this is to attempt somthing
+		// like KMP
+
+		if (dictionarySize > candidate.size)
 		{
 			// Check the dictionary for a match, brute force
-			for (int idx = 0; idx <= (dictionarySize-candidate.size); ++idx)
+			for (int dictionaryIndex = 0; dictionaryIndex <= (dictionarySize-candidate.size); ++dictionaryIndex)
 			{
-				bool bMatch = true;
-				for (int dataIdx = 0; dataIdx < data.size; ++dataIdx)
+				int sizeAvailable = dictionarySize - dictionaryIndex;
+
+				if (sizeAvailable > data.size) sizeAvailable = data.size;
+
+				// this could index off the end of the dictionary!!! FIX ME
+				for (int dataIndex = 0; dataIndex < sizeAvailable; ++dataIndex)
 				{
-					if (data.pData[ dataIdx ] == dictionary.pData[ idx + dataIdx ])
+					if (data.pData[ dataIndex ] == dictionary.pData[ dictionaryIndex + dataIndex ])
 					{
-						if (dataIdx > (candidate.size-1))
+						if (dataIndex >= candidate.size)
 						{
-							candidate.pData = dictionary.pData + idx;
-							candidate.size = dataIdx - 1;
+							candidate.pData = dictionary.pData + dictionaryIndex;
+							candidate.size = dataIndex + 1;
 						}
 						continue;
 					}
 
-					bMatch = false;
 					break;
 				}
 
 				if (candidate.size > result.size)
 				{
 					result = candidate;
+					//dictionaryIndex = -1;
 					break;
 				}
 			}
@@ -504,11 +511,16 @@ void LZB_Decompress(unsigned char* pDest, unsigned char* pSource, int destSize)
 {
 	int decompressedBytes = 0;
 
+	unsigned char *pOriginalSource = pSource;
+
 	while (decompressedBytes < destSize)
 	{
 		u16 opcode  = *pSource++;
 		    opcode |= ((u16)(*pSource++))<<8;
 
+		//printf("%04X:", (unsigned int)(pSource-pOriginalSource));
+
+
 		if (opcode & 0x8000)
 		{
 			// Dictionary
@@ -518,8 +530,18 @@ void LZB_Decompress(unsigned char* pDest, unsigned char* pSource, int destSize)
 			u16 offset  = *pSource++;
 			    offset |= ((u16)(*pSource++))<<8;
 
+			const char* overlapped = "";
+
+		   	if ((&pDest[ decompressedBytes ] - &pDest[ offset ]) < opcode)
+		    {
+				overlapped = "pattern";
+			}
+
 			my_memcpy(&pDest[ decompressedBytes ], &pDest[ offset ], opcode);
 			decompressedBytes += opcode;
+
+
+			//printf("%04X:Dic %04X %s\n",decompressedBytes, (unsigned int)opcode, overlapped);
 		}
 		else
 		{
@@ -527,6 +549,8 @@ void LZB_Decompress(unsigned char* pDest, unsigned char* pSource, int destSize)
 			memcpy(&pDest[ decompressedBytes ], pSource, opcode);
 			decompressedBytes += opcode;
 			pSource += opcode;
+
+			//printf("%04X:Lit %04X\n",decompressedBytes, (unsigned int)opcode);
 		}
 	}
 }
diff --git a/source/lzb.h b/source/lzb.h
index 8284aa2..015ba4b 100644
--- a/source/lzb.h
+++ b/source/lzb.h
@@ -8,6 +8,7 @@
 // returns the size of data saved into the pDest Buffer
 //  
 int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize);
+int Old_LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize);
 void LZB_Decompress(unsigned char* pDest, unsigned char* pSource, int destSize);
 
 #endif // LZB_H
diff --git a/source/main.cpp b/source/main.cpp
index fcb0d00..f62b9d4 100644
--- a/source/main.cpp
+++ b/source/main.cpp
@@ -114,14 +114,20 @@ int main(int argc, char* argv[])
 			const std::vector<unsigned char*>& c1Datas = c2data.GetPixelMaps();
 
 			unsigned char workbuffer[64*1024];
+			unsigned char workbuffer2[64*1024];
 
 			for (int idx = 0; idx < frameCount; ++idx)
 			{
+				int oldCompressedSize = Old_LZB_Compress(workbuffer2, c1Datas[ idx ], 32 * 1024);
+				printf("old compressedSize = %d\n", oldCompressedSize);
 				int compressedSize = LZB_Compress(workbuffer, c1Datas[ idx ], 32 * 1024);
 				printf("compressedSize = %d\n", compressedSize);
 
 				unsigned char validationBuffer[ 32 * 1024 ];
 
+				printf("Decompress OLD\n");
+				LZB_Decompress(validationBuffer, workbuffer2, 32 * 1024);
+				printf("Decompress NEW\n");
 				LZB_Decompress(validationBuffer, workbuffer, 32 * 1024);
 
 				if (0 == memcmp(c1Datas[ idx ], validationBuffer, 32*1024))