Maconv/src/stuffit/utils/huffman.cc

276 lines
7.4 KiB
C++

/*
Huffman decoder.
The code in this file is based on TheUnarchiver.
See README.md and docs/licenses/TheUnarchiver.txt for more information.
Copyright (C) 2019, Guillaume Gonnet
This program is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, either version 3 of the License, or (at your option) any later
version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see <https://www.gnu.org/licenses/>.
*/
#include "stuffit/utils/huffman.h"
#include "stuffit/methods.h"
#include <make_unique.hpp>
#include <limits>
namespace maconv {
namespace stuffit {
// Reverse a 32bits integer.
static uint32_t Reverse32(uint32_t val)
{
val = ((val >> 1) & 0x55555555) | ((val & 0x55555555) << 1);
val = ((val >> 2) & 0x33333333) | ((val & 0x33333333) << 2);
val = ((val >> 4) & 0x0F0F0F0F) | ((val & 0x0F0F0F0F) << 4);
val = ((val >> 8) & 0x00FF00FF) | ((val & 0x00FF00FF) << 8);
return (val >> 16) | (val << 16);
}
// Reverse a Nbits integer.
static uint32_t ReverseN(uint32_t val, int length)
{
return Reverse32(val) >> (32 - length);
}
// Copy an Huffman decoder.
HuffmanDecoder &HuffmanDecoder::operator=(const HuffmanDecoder &other)
{
tree = other.tree;
num_entries = other.num_entries;
min_length = other.min_length;
max_length = other.max_length;
return *this;
}
// Initialize the decoder.
void HuffmanDecoder::Initialize()
{
NewNode();
num_entries = 1;
min_length = std::numeric_limits<int>::max();
max_length = std::numeric_limits<int>::min();
}
// Initialize the decoder (with some lengths).
void HuffmanDecoder::Initialize(const int *lengths, int numsymbols, int maxcodelength,
bool zeros)
{
Initialize();
int code = 0, symbolsleft = numsymbols;
for (int length = 1; length <= maxcodelength; length++) {
for (int i = 0; i < numsymbols; i++) {
if (lengths[i] != length) continue;
AddValue(i, zeros ? code : ~code, length);
code++;
if (--symbolsleft == 0) return; // Early exit if all codes have been handled.
}
code <<= 1;
}
}
// Add a new value in the decoder.
void HuffmanDecoder::AddValue(int value, uint32_t code, int length)
{
AddValue(value, code, length, length);
}
// Add a new value in the decoder.
void HuffmanDecoder::AddValue(int value, uint32_t code, int length, int repeat_pos)
{
if (length > max_length) max_length = length;
if (length < min_length) min_length = length;
repeat_pos = length - 1 - repeat_pos;
int last_node = 0;
int codest = ((code >> (repeat_pos - 1)) & 3);
if (repeat_pos == 0 || (repeat_pos >= 0 && (codest == 0 || codest == 3)))
throw ExtractException("Huffman: invalid repeat position");
for (int bitpos = length - 1; bitpos >= 0; bitpos--) {
int bit = (code >> bitpos) & 1;
if (IsLeafNode(last_node))
throw ExtractException("Huffman: prefix already exists");
if (bitpos == repeat_pos) {
if (!IsOpenBranch(last_node, bit))
throw ExtractException("Huffman: invalid repeating code");
int repeat_node = NewNode();
int next_node = NewNode();
SetBranch(last_node, bit, repeat_node);
SetBranch(repeat_node, bit, repeat_node);
SetBranch(repeat_node, bit ^ 1, next_node);
last_node = next_node;
bitpos++; // Terminating bit already handled, skip it.
}
else {
if (IsOpenBranch(last_node, bit))
SetBranch(last_node, bit, NewNode());
last_node = Branch(last_node, bit);
}
}
if (!IsEmptyNode(last_node))
throw ExtractException("Huffman: prefix already exists");
SetLeafValue(last_node, value);
}
// Add a new value in the decoder (low bit first).
void HuffmanDecoder::AddValueLF(int value, uint32_t code, int length)
{
AddValue(value, ReverseN(code, length), length, length);
}
// Add a new value in the decoder (low bit first).
void HuffmanDecoder::AddValueLF(int value, uint32_t code, int length, int repeat_pos)
{
AddValue(value, ReverseN(code, length), length, repeat_pos);
}
// Get the next symbol from a bit reader.
int HuffmanDecoder::NextSymbol(utils::BitReader &input)
{
if (!table)
throw ExtractException("Huffman: search table not built");
int bits = input.ReadWord(table_size, false);
int length = table[bits].length;
int value = table[bits].value;
if (length < 0)
throw ExtractException("Huffman: invalid prefix code when getting next symbol [length]");
if (length <= table_size) {
input.SkipBits(length);
return value;
}
input.SkipBits(table_size);
int node = value;
for (int bit; !IsLeafNode(node); node = Branch(node, bit)) {
bit = input.ReadBit();
if (IsOpenBranch(node, bit))
throw ExtractException("Huffman: invalid prefix code when getting next symbol [code]");
}
return LeafValue(node);
}
// Make search table (Little Endian).
void HuffmanDecoder::MakeTableRecursLE(int node, HuffmanTableEntry *table,
int depth)
{
int curr_table_size = (1 << (table_size - depth));
int curr_stride = (1 << depth);
if (IsInvalidNode(node)) {
for (int i = 0; i < curr_table_size; i++)
table[i * curr_stride].length = -1;
}
else if (IsLeafNode(node)) {
for (int i = 0; i < curr_table_size; i++) {
table[i * curr_stride].length = depth;
table[i * curr_stride].value = LeafValue(node);
}
}
else {
if (depth == table_size) {
table[0].length = table_size + 1;
table[0].value = node;
} else {
MakeTableRecursLE(LeftBranch(node), table, depth + 1);
MakeTableRecursLE(RightBranch(node), table + curr_stride, depth + 1);
}
}
}
// Make search table (Big Endian).
void HuffmanDecoder::MakeTableRecursBE(int node, HuffmanTableEntry *table,
int depth)
{
int curr_table_size = (1 << (table_size - depth));
if (IsInvalidNode(node)) {
for (int i = 0; i < curr_table_size; i++)
table[i].length = -1;
}
else if (IsLeafNode(node)) {
for(int i = 0; i < curr_table_size; i++) {
table[i].length = depth;
table[i].value = LeafValue(node);
}
}
else {
if (depth == table_size) {
table[0].length = table_size + 1;
table[0].value = node;
} else {
MakeTableRecursBE(LeftBranch(node), table, depth + 1);
MakeTableRecursBE(RightBranch(node), table + curr_table_size/2, depth + 1);
}
}
}
// Make the search table.
void HuffmanDecoder::MakeTable(bool is_LE)
{
constexpr int kMaxTableSize = 10;
if (max_length < min_length) table_size = kMaxTableSize;
else if (max_length >= kMaxTableSize) table_size = kMaxTableSize;
else table_size = max_length;
table = std::make_unique<HuffmanTableEntry[]>(1 << table_size);
if (is_LE) MakeTableRecursLE(0, table.get(), 0);
else MakeTableRecursBE(0, table.get(), 0);
}
} // namespace stuffit
} // namespace maconv