// // LZB Encode / Decode // #include "lzb.h" #include #include #include "bctypes.h" #include "assert.h" // // This is written specifically for the GSLA, so opcodes emitted are designed // to work with our version of a run/skip/dump // // //Command Word, encoded low-high, what the bits mean: // // xxx_xxxx_xxxx_xxx is the number of bytes 1-16384 to follow (0 == 1 byte) // //%0xxx_xxxx_xxxx_xxx1 - Copy Bytes - straight copy bytes //%1xxx_xxxx_xxxx_xxx1 - Skip Bytes - skip bytes / move the cursor //%1xxx_xxxx_xxxx_xxx0 - Dictionary Copy Bytes from frame buffer to frame buffer // //%0000_0000_0000_0000- Source Skip -> Source pointer skips to next bank of data //%0000_0000_0000_0010- End of Frame - end of frame //%0000_0000_0000_0110- End of Animation / End of File / no more frames // #define MAX_DICTIONARY_SIZE (32 * 1024) #define MAX_STRING_SIZE (16384) // // Yes This is a 32K Buffer, of bytes, with no structure to it // static unsigned char *pGlobalDictionary = nullptr; struct DataString { // Information about the data we're trying to match int size; unsigned char *pData; }; static int AddDictionary(const DataString& data, int dictionarySize); static int EmitLiteral(unsigned char *pDest, DataString& data); static int ConcatLiteral(unsigned char *pDest, DataString& data); static int EmitReference(unsigned char *pDest, int dictionaryOffset, DataString& data); static int DictionaryMatch(const DataString& data, int dictionarySize); // Stuff I need for a faster version static DataString LongestMatch(const DataString& data, const DataString& dictionary); static DataString LongestMatch(const DataString& data, const DataString& dictionary, int cursorPosition); // // New Version, still Brute Force, but not as many times // int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize) { //printf("LZB Compress %d bytes\n", sourceSize); unsigned char *pOriginalDest = pDest; DataString sourceData; DataString dictionaryData; DataString candidateData; // Source Data Stream - will compress until the size is zero sourceData.pData = pSource; sourceData.size = sourceSize; // Remember, this eventually will point at the frame buffer pGlobalDictionary = pSource; dictionaryData.pData = pSource; dictionaryData.size = 0; // dumb last emit is a literal stuff bool bLastEmitIsLiteral = false; unsigned char* pLastLiteralDest = nullptr; while (sourceData.size > 0) { candidateData = LongestMatch(sourceData, dictionaryData); // If no match, or the match is too small, then take the next byte // and emit as literal if ((0 == candidateData.size)) // || (candidateData.size < 4)) { candidateData.size = 1; candidateData.pData = sourceData.pData; } // Adjust source stream sourceData.pData += candidateData.size; sourceData.size -= candidateData.size; dictionaryData.size = AddDictionary(candidateData, dictionaryData.size); if (candidateData.size > 3) { // Emit a dictionary reference pDest += (int)EmitReference(pDest, (int)(candidateData.pData - dictionaryData.pData), candidateData); bLastEmitIsLiteral = false; } else if (bLastEmitIsLiteral) { // Concatenate this literal onto the previous literal pDest += ConcatLiteral(pLastLiteralDest, candidateData); } else { // Emit a new literal pLastLiteralDest = pDest; bLastEmitIsLiteral = true; pDest += EmitLiteral(pDest, candidateData); } } return (int)(pDest - pOriginalDest); } // // This works, but it's stupidly slow, because it uses brute force, and // because the brute force starts over everytime I grow the data string // int Old_LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize) { //printf("LZB_Compress %d bytes\n", sourceSize); // Initialize Dictionary int bytesInDictionary = 0; // eventually add the ability to start with the dictionary filled pGlobalDictionary = pSource; int processedBytes = 0; int bytesEmitted = 0; // dumb last emit is a literal stuff bool bLastEmitIsLiteral = false; int lastEmittedLiteralOffset = 0; DataString candidate_data; candidate_data.pData = pSource; candidate_data.size = 0; int MatchOffset = -1; int PreviousMatchOffset = -1; while (processedBytes < sourceSize) { // Add a byte to the candidate_data, also tally number of processed processedBytes++; candidate_data.size++; // Basic Flow Idea Here // If there's a match, then add to the candidate data, and see if // there's a bigger match (use previous result to speed up search) // else // if there's a previous match, and it's large enough, emit that // else emit what we have as a literal // (KMP is probably the last planned optmization here) PreviousMatchOffset = MatchOffset; MatchOffset = DictionaryMatch(candidate_data, bytesInDictionary); // The dictionary only contains bytes that have been emitted, so we // can't add this byte until we've emitted it? if (MatchOffset < 0) { // Was there a dictionary match // Previous Data, we can't get here with candidate_data.size == 0 // this is an opportunity to use an assert candidate_data.size--; MatchOffset = PreviousMatchOffset; //DictionaryMatch(candidate_data, bytesInDictionary); if ((MatchOffset >= 0) && candidate_data.size > 3) { processedBytes--; bytesInDictionary = AddDictionary(candidate_data, bytesInDictionary); bytesEmitted += EmitReference(pDest + bytesEmitted, MatchOffset, candidate_data); bLastEmitIsLiteral = false; } else { if (0 == candidate_data.size) { candidate_data.size++; } else { processedBytes--; //if (candidate_data.size > 1) //{ // processedBytes -= (candidate_data.size - 1); // candidate_data.size = 1; //} } // Add Dictionary bytesInDictionary = AddDictionary(candidate_data, bytesInDictionary); if (bLastEmitIsLiteral) { // If the last emit was a literal, I want to concatenate // this literal into the previous opcode, to save space bytesEmitted += ConcatLiteral(pDest + lastEmittedLiteralOffset, candidate_data); } else { lastEmittedLiteralOffset = bytesEmitted; bytesEmitted += EmitLiteral(pDest + bytesEmitted, candidate_data); } bLastEmitIsLiteral = true; //MatchOffset = -1; } } } if (candidate_data.size > 0) { int MatchOffset = DictionaryMatch(candidate_data, bytesInDictionary); if ((MatchOffset >=0) && candidate_data.size > 2) { bytesInDictionary = AddDictionary(candidate_data, bytesInDictionary); bytesEmitted += EmitReference(pDest + bytesEmitted, MatchOffset, candidate_data); } else { // Add Dictionary bytesInDictionary = AddDictionary(candidate_data, bytesInDictionary); if (bLastEmitIsLiteral) { // If the last emit was a literal, I want to concatenate // this literal into the previous opcode, to save space bytesEmitted += ConcatLiteral(pDest + lastEmittedLiteralOffset, candidate_data); } else { bytesEmitted += EmitLiteral(pDest + bytesEmitted, candidate_data); } } } return bytesEmitted; } //------------------------------------------------------------------------------ // Return new dictionarySize static int AddDictionary(const DataString& data, int dictionarySize) { int dataIndex = 0; while (dataIndex < data.size) { pGlobalDictionary[ dictionarySize++ ] = data.pData[ dataIndex++ ]; } //dictionarySize += data.size; return dictionarySize; } //------------------------------------------------------------------------------ // // Return longest match of data, in dictionary // DataString LongestMatch(const DataString& data, const DataString& dictionary) { DataString result; result.pData = nullptr; result.size = 0; // Find the longest matching data in the dictionary if ((dictionary.size > 0) && (data.size > 0)) { DataString candidate; candidate.pData = data.pData; candidate.size = 0; // First Check for a pattern / run-length style match // Check the end of the dictionary, to see if this data could be a // pattern "run" (where we can repeat a pattern for X many times for free // using the memcpy with overlapping source/dest buffers) // (This is a dictionary based pattern run/length) { // Check for pattern sizes, start small int max_pattern_size = 4096; if (dictionary.size < max_pattern_size) max_pattern_size = dictionary.size; if (data.size < max_pattern_size) max_pattern_size = data.size; for (int pattern_size = 1; pattern_size <= max_pattern_size; ++pattern_size) { int pattern_start = dictionary.size - pattern_size; for (int dataIndex = 0; dataIndex < data.size; ++dataIndex) { if (data.pData[ dataIndex ] == dictionary.pData[ pattern_start + (dataIndex % pattern_size) ]) { candidate.pData = dictionary.pData + pattern_start; candidate.size = dataIndex+1; continue; } break; } //if (candidate.size < pattern_size) // break; if (candidate.size > result.size) { result = candidate; } } } // As an optimization int dictionarySize = dictionary.size; // - 1; // This last string has already been checked by, the // run-length matcher above // As the size grows, we're missing potential matches in here // I think the best way to counter this is to attempt somthing // like KMP if (dictionarySize > candidate.size) { // Check the dictionary for a match, brute force for (int dictionaryIndex = 0; dictionaryIndex <= (dictionarySize-candidate.size); ++dictionaryIndex) { int sizeAvailable = dictionarySize - dictionaryIndex; if (sizeAvailable > data.size) sizeAvailable = data.size; // this could index off the end of the dictionary!!! FIX ME for (int dataIndex = 0; dataIndex < sizeAvailable; ++dataIndex) { if (data.pData[ dataIndex ] == dictionary.pData[ dictionaryIndex + dataIndex ]) { if (dataIndex >= candidate.size) { candidate.pData = dictionary.pData + dictionaryIndex; candidate.size = dataIndex + 1; } continue; } break; } if (candidate.size > result.size) { result = candidate; //dictionaryIndex = -1; break; } } } } return result; } //------------------------------------------------------------------------------ DataString LongestMatch(const DataString& data, const DataString& dictionary, int cursorPosition) { DataString result; result.pData = nullptr; result.size = 0; // Find the longest matching data in the dictionary if ((dictionary.size > 0) && (data.size > 0)) { DataString candidate; candidate.pData = data.pData; candidate.size = 0; // First Check for a pattern / run-length style match // Check the end of the dictionary, to see if this data could be a // pattern "run" (where we can repeat a pattern for X many times for free // using the memcpy with overlapping source/dest buffers) // (This is a dictionary based pattern run/length) { // Check for pattern sizes, start small int max_pattern_size = 4096; if (cursorPosition < max_pattern_size) max_pattern_size = cursorPosition; if (data.size < max_pattern_size) max_pattern_size = data.size; for (int pattern_size = 1; pattern_size <= max_pattern_size; ++pattern_size) { int pattern_start = cursorPosition - pattern_size; for (int dataIndex = 0; dataIndex < data.size; ++dataIndex) { if (data.pData[ dataIndex ] == dictionary.pData[ pattern_start + (dataIndex % pattern_size) ]) { candidate.pData = dictionary.pData + pattern_start; candidate.size = dataIndex+1; continue; } break; } if (candidate.size > result.size) { result = candidate; } } } // Not getting better than this if (result.size == data.size) return result; // This will keep us from finding matches that we can't use int dictionarySize = cursorPosition; // As the size grows, we're missing potential matches in here // I think the best way to counter this is to attempt somthing // like KMP if (dictionarySize > candidate.size) { // Check the dictionary for a match, brute force for (int dictionaryIndex = 0; dictionaryIndex <= (dictionarySize-candidate.size); ++dictionaryIndex) { int sizeAvailable = dictionarySize - dictionaryIndex; if (sizeAvailable > data.size) sizeAvailable = data.size; // this could index off the end of the dictionary!!! FIX ME for (int dataIndex = 0; dataIndex < sizeAvailable; ++dataIndex) { if (data.pData[ dataIndex ] == dictionary.pData[ dictionaryIndex + dataIndex ]) { if (dataIndex >= candidate.size) { candidate.pData = dictionary.pData + dictionaryIndex; candidate.size = dataIndex + 1; } continue; } break; } if (candidate.size > result.size) { result = candidate; //dictionaryIndex = -1; break; } } } // Not getting better than this if (result.size == data.size) return result; #if 1 // Look for matches beyond the cursor dictionarySize = dictionary.size; if ((dictionarySize-cursorPosition) > candidate.size) { // Check the dictionary for a match, brute force for (int dictionaryIndex = cursorPosition+3; dictionaryIndex <= (dictionarySize-candidate.size); ++dictionaryIndex) { int sizeAvailable = dictionarySize - dictionaryIndex; if (sizeAvailable > data.size) sizeAvailable = data.size; // this could index off the end of the dictionary!!! FIX ME for (int dataIndex = 0; dataIndex < sizeAvailable; ++dataIndex) { if (data.pData[ dataIndex ] == dictionary.pData[ dictionaryIndex + dataIndex ]) { if (dataIndex >= candidate.size) { candidate.pData = dictionary.pData + dictionaryIndex; candidate.size = dataIndex + 1; } continue; } break; } if (candidate.size > result.size) { result = candidate; break; } } } #endif } return result; } //------------------------------------------------------------------------------ // // Return offset into dictionary where the string matches // // -1 means, no match // static int DictionaryMatch(const DataString& data, int dictionarySize) { if( (0 == dictionarySize ) || (0 == data.size) || (data.size > MAX_STRING_SIZE) ) // 16384 is largest string copy we can encode { return -1; } // Check the end of the dictionary, to see if this data could be a // pattern "run" (where we can repeat a pattern for X many times for free // using the memcpy with overlapping source/dest buffers) // (This is a dictionary based pattern run/length) { // Check for pattern sizes, start small int max_pattern_size = 256; if (dictionarySize < max_pattern_size) max_pattern_size = dictionarySize; if (data.size < max_pattern_size) max_pattern_size = data.size; for (int pattern_size = 1; pattern_size <= max_pattern_size; ++pattern_size) { bool bMatch = true; int pattern_start = dictionarySize - pattern_size; for (int dataIndex = 0; dataIndex < data.size; ++dataIndex) { if (data.pData[ dataIndex ] == pGlobalDictionary[ pattern_start + (dataIndex % pattern_size) ]) continue; bMatch = false; break; } if (bMatch) { // Return a RLE Style match result return pattern_start; } } } // As an optimization dictionarySize -= 1; // This last string has already been checked by, the // run-length matcher above if (dictionarySize < data.size) { return -1; } int result = -1; // Check the dictionary for a match, brute force for (int idx = 0; idx <= (dictionarySize-data.size); ++idx) { bool bMatch = true; for (int dataIdx = 0; dataIdx < data.size; ++dataIdx) { if (data.pData[ dataIdx ] == pGlobalDictionary[ idx + dataIdx ]) continue; bMatch = false; break; } if (bMatch) { result = idx; break; } } return result; } //------------------------------------------------------------------------------ // // Emit a literal, that appends itself to an existing literal // static int ConcatLiteral(unsigned char *pDest, DataString& data) { // Return Size int outSize = (int)data.size; int opCode = pDest[0]; opCode |= (int)(((pDest[1])&0x7F)<<8); opCode>>=1; opCode+=1; // opCode contains the length of the literal that's already encoded int skip = opCode; opCode += outSize; // Opcode opCode -= 1; opCode <<=1; opCode |= 1; *pDest++ = (unsigned char)(opCode & 0xFF); *pDest++ = (unsigned char)((opCode >> 8) & 0x7F); pDest += skip; // Literal Data for (int idx = 0; idx < data.size; ++idx) { *pDest++ = data.pData[ idx ]; } // Clear data.pData += data.size; data.size = 0; return outSize; } //------------------------------------------------------------------------------ static int EmitLiteral(unsigned char *pDest, DataString& data) { // Return Size int outSize = 2 + (int)data.size; unsigned short length = (unsigned short)data.size; length -= 1; assert(length < MAX_STRING_SIZE); unsigned short opcode = length<<1; opcode |= 0x0001; // Opcode out *pDest++ = (unsigned char)( opcode & 0xFF ); *pDest++ = (unsigned char)(( opcode>>8)&0xFF); // Literal Data for (int idx = 0; idx < data.size; ++idx) { *pDest++ = data.pData[ idx ]; } // Clear data.pData += data.size; data.size = 0; return outSize; } //------------------------------------------------------------------------------ static int EmitReference(unsigned char *pDest, int dictionaryOffset, DataString& data) { // Return Size int outSize = 2 + 2; unsigned short length = (unsigned short)data.size; length -= 1; assert(length < MAX_STRING_SIZE); unsigned short opcode = length<<1; opcode |= 0x8000; // Opcode out *pDest++ = (unsigned char)( opcode & 0xFF ); *pDest++ = (unsigned char)(( opcode>>8)&0xFF); // Destination Address out unsigned short address = (unsigned short)dictionaryOffset; address += 0x2000; // So we don't have to add $2000 in the animation player *pDest++ = (unsigned char)(address & 0xFF); *pDest++ = (unsigned char)((address>>8)&0xFF); // Clear data.pData += data.size; data.size = 0; return outSize; } //------------------------------------------------------------------------------ // // Std C memcpy seems to be stopping the copy from happening, when I overlap // the buffer to get a pattern run copy (overlapped buffers) // static void my_memcpy(u8* pDest, u8* pSrc, int length) { while (length-- > 0) { *pDest++ = *pSrc++; } } //------------------------------------------------------------------------------ // // Emit one or more Cursor Skip forward opcode // int EmitSkip(unsigned char* pDest, int skipSize) { int outSize = 0; int thisSkip = 0; while (skipSize > 0) { outSize+=2; thisSkip = skipSize; if (thisSkip > MAX_STRING_SIZE) { thisSkip = MAX_STRING_SIZE; } skipSize -= thisSkip; unsigned short length = (unsigned short)thisSkip; length -= 1; assert(length < MAX_STRING_SIZE); unsigned short opcode = length<<1; opcode |= 0x8001; // Opcode out *pDest++ = (unsigned char)( opcode & 0xFF ); *pDest++ = (unsigned char)(( opcode>>8)&0xFF); } return outSize; } //------------------------------------------------------------------------------ // // Forcibly Emit a source Skip Opcode // // return space_left_in_Bank // int EmitSourceSkip(unsigned char*& pDest, int space_left_in_bank) { assert(space_left_in_bank >= 2); *pDest++ = 0; *pDest++ = 0; space_left_in_bank-=2; while (space_left_in_bank) { space_left_in_bank--; *pDest++ = 0; } return 0x10000; } //------------------------------------------------------------------------------ // // Conditionally shit out the Source Bank Skip // int CheckEmitSourceSkip(int checkSpace, unsigned char*& pDest, int space_left_in_bank) { if ((checkSpace+2) > space_left_in_bank) { return EmitSourceSkip(pDest, space_left_in_bank); } space_left_in_bank -= checkSpace; return space_left_in_bank; } //------------------------------------------------------------------------------ // // Compress a Frame in the GSLA LZB Format // // The dictionary is also the canvas, so when we're finished the dictionary // buffer will match the original pSource buffer // // If they both match to begin with, we just crap out an End of Frame opcode // int LZBA_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize, unsigned char* pDataStart, unsigned char* pDictionary, int dictionarySize) { // printf("LZBA Compress %d bytes\n", sourceSize); pGlobalDictionary = pDictionary; // Used for bank skip opcode emission int bankOffset = (int)((pDest - pDataStart) & 0xFFFF); // So we can track how big our compressed data ends up being unsigned char *pOriginalDest = pDest; DataString sourceData; DataString dictionaryData; DataString candidateData; // Source Data Stream - will compress until the size is zero sourceData.pData = pSource; sourceData.size = sourceSize; // Dictionary is the Frame Buffer dictionaryData.pData = pDictionary; dictionaryData.size = dictionarySize; // dumb last emit is a literal stuff bool bLastEmitIsLiteral = false; unsigned char* pLastLiteralDest = nullptr; int lastEmittedCursorPosition = 0; // This is the default for each frame int space_left_in_bank = (int)0x10000 - (int)((pDest - pDataStart)&0xFFFF); space_left_in_bank = CheckEmitSourceSkip(0, pDest, space_left_in_bank); for (int cursorPosition = 0; cursorPosition < dictionarySize;) { if (pSource[ cursorPosition ] != pDictionary[ cursorPosition ]) { // Here is some data that has to be processed, so let's decide // how large of a chunk of data we're looking at here // Do we need to emit a Skip opcode?, compare cursor to last emit // and emit a skip command if we need it (I'm going want a gap of // at least 3 bytes? before we call it the end int skipSize = cursorPosition - lastEmittedCursorPosition; if (skipSize) { int numSkips = (skipSize / MAX_STRING_SIZE) + 1; space_left_in_bank = CheckEmitSourceSkip(2 * numSkips, pDest, space_left_in_bank); // We need to Skip pDest += EmitSkip(pDest, skipSize); bLastEmitIsLiteral = false; lastEmittedCursorPosition = cursorPosition; } int tempCursorPosition = cursorPosition; int gapCount = 0; for (; tempCursorPosition < dictionarySize; ++tempCursorPosition) { if (pSource[ tempCursorPosition ] != pDictionary[ tempCursorPosition ]) { gapCount = 0; } else { // if there's a small amount of matching data, let's include // it in the clump (try and reduce opcode emissions) if (gapCount >= 3) break; gapCount++; } } tempCursorPosition -= gapCount; // Now we know from cursorPosition to tempCursorPosition is data // that we want to encode, we either literally copy it, or look // to see if this data is already in the dictionary (so we can copy // it from one part of the frame buffer to another part) sourceData.pData = &pSource[ cursorPosition ]; sourceData.size = tempCursorPosition - cursorPosition; #if 0 // This Works //-------------------------- Dump, so skip dump only space_left_in_bank = CheckEmitSourceSkip(2+sourceData.size, pDest, space_left_in_bank); cursorPosition = AddDictionary(sourceData, cursorPosition); pDest += EmitLiteral(pDest, sourceData); lastEmittedCursorPosition = cursorPosition; #endif while (sourceData.size > 0) { candidateData = LongestMatch(sourceData, dictionaryData, cursorPosition); // If no match, or the match is too small, then take the next byte // and emit as literal if ((0 == candidateData.size)) // || (candidateData.size < 4)) { candidateData.size = 1; candidateData.pData = sourceData.pData; } // Adjust source stream sourceData.pData += candidateData.size; sourceData.size -= candidateData.size; // Modify the dictionary cursorPosition = AddDictionary(candidateData, cursorPosition); lastEmittedCursorPosition = cursorPosition; if (candidateData.size > 3) { space_left_in_bank = CheckEmitSourceSkip(4, pDest, space_left_in_bank); // Emit a dictionary reference pDest += (int)EmitReference(pDest, (int)(candidateData.pData - dictionaryData.pData), candidateData); bLastEmitIsLiteral = false; } else if (bLastEmitIsLiteral) { // This is a problem for the source bank skip, we can't // concatenate if we end up injecting a source bank skip opcode // into the stream... what to do???, if insert, we will need to // do a "normal" literal emission, ugly int space = CheckEmitSourceSkip(candidateData.size, pDest, space_left_in_bank); if (space != (space_left_in_bank - candidateData.size)) { space_left_in_bank = space-2; // Emit a new literal pLastLiteralDest = pDest; pDest += EmitLiteral(pDest, candidateData); } else { // Concatenate this literal onto the previous literal space_left_in_bank = space; pDest += ConcatLiteral(pLastLiteralDest, candidateData); } } else { space_left_in_bank = CheckEmitSourceSkip(2 + candidateData.size, pDest, space_left_in_bank); // Emit a new literal pLastLiteralDest = pDest; bLastEmitIsLiteral = true; pDest += EmitLiteral(pDest, candidateData); } } } else { // no change cursorPosition++; } } space_left_in_bank = CheckEmitSourceSkip(2, pDest, space_left_in_bank); // Emit the End of Frame Opcode *pDest++ = 0x02; *pDest++ = 0x00; for (int idx = 0; idx < dictionarySize; ++idx) { if (pSource[ idx ] != pDictionary[ idx ]) { assert(0); } } return (int)(pDest - pOriginalDest); } //------------------------------------------------------------------------------