Retro68/gcc/libgo/go/compress/bzip2/bzip2.go

// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package bzip2 implements bzip2 decompression.
package bzip2

import "io"

// There's no RFC for bzip2. I used the Wikipedia page for reference and a lot
// of guessing: http://en.wikipedia.org/wiki/Bzip2
// The source code to pyflate was useful for debugging:
// http://www.paul.sladen.org/projects/pyflate

// A StructuralError is returned when the bzip2 data is found to be
// syntactically invalid.
type StructuralError string

func (s StructuralError) Error() string {
	return "bzip2 data invalid: " + string(s)
}

// A reader decompresses bzip2 compressed data.
type reader struct {
	br           bitReader
	fileCRC      uint32
	blockCRC     uint32
	wantBlockCRC uint32
	setupDone    bool // true if we have parsed the bzip2 header.
	blockSize    int  // blockSize in bytes, i.e. 900 * 1024.
	eof          bool
	buf          []byte    // stores Burrows-Wheeler transformed data.
	c            [256]uint // the `C' array for the inverse BWT.
	tt           []uint32  // mirrors the `tt' array in the bzip2 source and contains the P array in the upper 24 bits.
	tPos         uint32    // Index of the next output byte in tt.

	preRLE      []uint32 // contains the RLE data still to be processed.
	preRLEUsed  int      // number of entries of preRLE used.
	lastByte    int      // the last byte value seen.
	byteRepeats uint     // the number of repeats of lastByte seen.
	repeats     uint     // the number of copies of lastByte to output.
}

// NewReader returns an io.Reader which decompresses bzip2 data from r.
// If r does not also implement io.ByteReader,
// the decompressor may read more data than necessary from r.
func NewReader(r io.Reader) io.Reader {
	bz2 := new(reader)
	bz2.br = newBitReader(r)
	return bz2
}

const bzip2FileMagic = 0x425a // "BZ"
const bzip2BlockMagic = 0x314159265359
const bzip2FinalMagic = 0x177245385090

// setup parses the bzip2 header.
func (bz2 *reader) setup(needMagic bool) error {
	br := &bz2.br

	if needMagic {
		magic := br.ReadBits(16)
		if magic != bzip2FileMagic {
			return StructuralError("bad magic value")
		}
	}

	t := br.ReadBits(8)
	if t != 'h' {
		return StructuralError("non-Huffman entropy encoding")
	}

	level := br.ReadBits(8)
	if level < '1' || level > '9' {
		return StructuralError("invalid compression level")
	}

	bz2.fileCRC = 0
	bz2.blockSize = 100 * 1024 * (int(level) - '0')
	if bz2.blockSize > len(bz2.tt) {
		bz2.tt = make([]uint32, bz2.blockSize)
	}
	return nil
}

func (bz2 *reader) Read(buf []byte) (n int, err error) {
	if bz2.eof {
		return 0, io.EOF
	}

	if !bz2.setupDone {
		err = bz2.setup(true)
		brErr := bz2.br.Err()
		if brErr != nil {
			err = brErr
		}
		if err != nil {
			return 0, err
		}
		bz2.setupDone = true
	}

	n, err = bz2.read(buf)
	brErr := bz2.br.Err()
	if brErr != nil {
		err = brErr
	}
	return
}

func (bz2 *reader) readFromBlock(buf []byte) int {
	// bzip2 is a block based compressor, except that it has a run-length
	// preprocessing step. The block based nature means that we can
	// preallocate fixed-size buffers and reuse them. However, the RLE
	// preprocessing would require allocating huge buffers to store the
	// maximum expansion. Thus we process blocks all at once, except for
	// the RLE which we decompress as required.
	n := 0
	for (bz2.repeats > 0 || bz2.preRLEUsed < len(bz2.preRLE)) && n < len(buf) {
		// We have RLE data pending.

		// The run-length encoding works like this:
		// Any sequence of four equal bytes is followed by a length
		// byte which contains the number of repeats of that byte to
		// include. (The number of repeats can be zero.) Because we are
		// decompressing on-demand our state is kept in the reader
		// object.

		if bz2.repeats > 0 {
			buf[n] = byte(bz2.lastByte)
			n++
			bz2.repeats--
			if bz2.repeats == 0 {
				bz2.lastByte = -1
			}
			continue
		}

		bz2.tPos = bz2.preRLE[bz2.tPos]
		b := byte(bz2.tPos)
		bz2.tPos >>= 8
		bz2.preRLEUsed++

		if bz2.byteRepeats == 3 {
			bz2.repeats = uint(b)
			bz2.byteRepeats = 0
			continue
		}

		if bz2.lastByte == int(b) {
			bz2.byteRepeats++
		} else {
			bz2.byteRepeats = 0
		}
		bz2.lastByte = int(b)

		buf[n] = b
		n++
	}

	return n
}

func (bz2 *reader) read(buf []byte) (int, error) {
	for {
		n := bz2.readFromBlock(buf)
		if n > 0 {
			bz2.blockCRC = updateCRC(bz2.blockCRC, buf[:n])
			return n, nil
		}

		// End of block. Check CRC.
		if bz2.blockCRC != bz2.wantBlockCRC {
			bz2.br.err = StructuralError("block checksum mismatch")
			return 0, bz2.br.err
		}

		// Find next block.
		br := &bz2.br
		switch br.ReadBits64(48) {
		default:
			return 0, StructuralError("bad magic value found")

		case bzip2BlockMagic:
			// Start of block.
			err := bz2.readBlock()
			if err != nil {
				return 0, err
			}

		case bzip2FinalMagic:
			// Check end-of-file CRC.
			wantFileCRC := uint32(br.ReadBits64(32))
			if br.err != nil {
				return 0, br.err
			}
			if bz2.fileCRC != wantFileCRC {
				br.err = StructuralError("file checksum mismatch")
				return 0, br.err
			}

			// Skip ahead to byte boundary.
			// Is there a file concatenated to this one?
			// It would start with BZ.
			if br.bits%8 != 0 {
				br.ReadBits(br.bits % 8)
			}
			b, err := br.r.ReadByte()
			if err == io.EOF {
				br.err = io.EOF
				bz2.eof = true
				return 0, io.EOF
			}
			if err != nil {
				br.err = err
				return 0, err
			}
			z, err := br.r.ReadByte()
			if err != nil {
				if err == io.EOF {
					err = io.ErrUnexpectedEOF
				}
				br.err = err
				return 0, err
			}
			if b != 'B' || z != 'Z' {
				return 0, StructuralError("bad magic value in continuation file")
			}
			if err := bz2.setup(false); err != nil {
				return 0, err
			}
		}
	}
}

// readBlock reads a bzip2 block. The magic number should already have been consumed.
func (bz2 *reader) readBlock() (err error) {
	br := &bz2.br
	bz2.wantBlockCRC = uint32(br.ReadBits64(32)) // skip checksum. TODO: check it if we can figure out what it is.
	bz2.blockCRC = 0
	bz2.fileCRC = (bz2.fileCRC<<1 | bz2.fileCRC>>31) ^ bz2.wantBlockCRC
	randomized := br.ReadBits(1)
	if randomized != 0 {
		return StructuralError("deprecated randomized files")
	}
	origPtr := uint(br.ReadBits(24))

	// If not every byte value is used in the block (i.e., it's text) then
	// the symbol set is reduced. The symbols used are stored as a
	// two-level, 16x16 bitmap.
	symbolRangeUsedBitmap := br.ReadBits(16)
	symbolPresent := make([]bool, 256)
	numSymbols := 0
	for symRange := uint(0); symRange < 16; symRange++ {
		if symbolRangeUsedBitmap&(1<<(15-symRange)) != 0 {
			bits := br.ReadBits(16)
			for symbol := uint(0); symbol < 16; symbol++ {
				if bits&(1<<(15-symbol)) != 0 {
					symbolPresent[16*symRange+symbol] = true
					numSymbols++
				}
			}
		}
	}

	if numSymbols == 0 {
		// There must be an EOF symbol.
		return StructuralError("no symbols in input")
	}

	// A block uses between two and six different Huffman trees.
	numHuffmanTrees := br.ReadBits(3)
	if numHuffmanTrees < 2 || numHuffmanTrees > 6 {
		return StructuralError("invalid number of Huffman trees")
	}

	// The Huffman tree can switch every 50 symbols so there's a list of
	// tree indexes telling us which tree to use for each 50 symbol block.
	numSelectors := br.ReadBits(15)
	treeIndexes := make([]uint8, numSelectors)

	// The tree indexes are move-to-front transformed and stored as unary
	// numbers.
	mtfTreeDecoder := newMTFDecoderWithRange(numHuffmanTrees)
	for i := range treeIndexes {
		c := 0
		for {
			inc := br.ReadBits(1)
			if inc == 0 {
				break
			}
			c++
		}
		if c >= numHuffmanTrees {
			return StructuralError("tree index too large")
		}
		treeIndexes[i] = uint8(mtfTreeDecoder.Decode(c))
	}

	// The list of symbols for the move-to-front transform is taken from
	// the previously decoded symbol bitmap.
	symbols := make([]byte, numSymbols)
	nextSymbol := 0
	for i := 0; i < 256; i++ {
		if symbolPresent[i] {
			symbols[nextSymbol] = byte(i)
			nextSymbol++
		}
	}
	mtf := newMTFDecoder(symbols)

	numSymbols += 2 // to account for RUNA and RUNB symbols
	huffmanTrees := make([]huffmanTree, numHuffmanTrees)

	// Now we decode the arrays of code-lengths for each tree.
	lengths := make([]uint8, numSymbols)
	for i := range huffmanTrees {
		// The code lengths are delta encoded from a 5-bit base value.
		length := br.ReadBits(5)
		for j := range lengths {
			for {
				if !br.ReadBit() {
					break
				}
				if br.ReadBit() {
					length--
				} else {
					length++
				}
			}
			if length < 0 || length > 20 {
				return StructuralError("Huffman length out of range")
			}
			lengths[j] = uint8(length)
		}
		huffmanTrees[i], err = newHuffmanTree(lengths)
		if err != nil {
			return err
		}
	}

	selectorIndex := 1 // the next tree index to use
	if len(treeIndexes) == 0 {
		return StructuralError("no tree selectors given")
	}
	if int(treeIndexes[0]) >= len(huffmanTrees) {
		return StructuralError("tree selector out of range")
	}
	currentHuffmanTree := huffmanTrees[treeIndexes[0]]
	bufIndex := 0 // indexes bz2.buf, the output buffer.
	// The output of the move-to-front transform is run-length encoded and
	// we merge the decoding into the Huffman parsing loop. These two
	// variables accumulate the repeat count. See the Wikipedia page for
	// details.
	repeat := 0
	repeat_power := 0

	// The `C' array (used by the inverse BWT) needs to be zero initialized.
	for i := range bz2.c {
		bz2.c[i] = 0
	}

	decoded := 0 // counts the number of symbols decoded by the current tree.
	for {
		if decoded == 50 {
			if selectorIndex >= numSelectors {
				return StructuralError("insufficient selector indices for number of symbols")
			}
			if int(treeIndexes[selectorIndex]) >= len(huffmanTrees) {
				return StructuralError("tree selector out of range")
			}
			currentHuffmanTree = huffmanTrees[treeIndexes[selectorIndex]]
			selectorIndex++
			decoded = 0
		}

		v := currentHuffmanTree.Decode(br)
		decoded++

		if v < 2 {
			// This is either the RUNA or RUNB symbol.
			if repeat == 0 {
				repeat_power = 1
			}
			repeat += repeat_power << v
			repeat_power <<= 1

			// This limit of 2 million comes from the bzip2 source
			// code. It prevents repeat from overflowing.
			if repeat > 2*1024*1024 {
				return StructuralError("repeat count too large")
			}
			continue
		}

		if repeat > 0 {
			// We have decoded a complete run-length so we need to
			// replicate the last output symbol.
			if repeat > bz2.blockSize-bufIndex {
				return StructuralError("repeats past end of block")
			}
			for i := 0; i < repeat; i++ {
				b := byte(mtf.First())
				bz2.tt[bufIndex] = uint32(b)
				bz2.c[b]++
				bufIndex++
			}
			repeat = 0
		}

		if int(v) == numSymbols-1 {
			// This is the EOF symbol. Because it's always at the
			// end of the move-to-front list, and never gets moved
			// to the front, it has this unique value.
			break
		}

		// Since two metasymbols (RUNA and RUNB) have values 0 and 1,
		// one would expect |v-2| to be passed to the MTF decoder.
		// However, the front of the MTF list is never referenced as 0,
		// it's always referenced with a run-length of 1. Thus 0
		// doesn't need to be encoded and we have |v-1| in the next
		// line.
		b := byte(mtf.Decode(int(v - 1)))
		if bufIndex >= bz2.blockSize {
			return StructuralError("data exceeds block size")
		}
		bz2.tt[bufIndex] = uint32(b)
		bz2.c[b]++
		bufIndex++
	}

	if origPtr >= uint(bufIndex) {
		return StructuralError("origPtr out of bounds")
	}

	// We have completed the entropy decoding. Now we can perform the
	// inverse BWT and setup the RLE buffer.
	bz2.preRLE = bz2.tt[:bufIndex]
	bz2.preRLEUsed = 0
	bz2.tPos = inverseBWT(bz2.preRLE, origPtr, bz2.c[:])
	bz2.lastByte = -1
	bz2.byteRepeats = 0
	bz2.repeats = 0

	return nil
}

// inverseBWT implements the inverse Burrows-Wheeler transform as described in
// http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf, section 4.2.
// In that document, origPtr is called `I' and c is the `C' array after the
// first pass over the data. It's an argument here because we merge the first
// pass with the Huffman decoding.
//
// This also implements the `single array' method from the bzip2 source code
// which leaves the output, still shuffled, in the bottom 8 bits of tt with the
// index of the next byte in the top 24-bits. The index of the first byte is
// returned.
func inverseBWT(tt []uint32, origPtr uint, c []uint) uint32 {
	sum := uint(0)
	for i := 0; i < 256; i++ {
		sum += c[i]
		c[i] = sum - c[i]
	}

	for i := range tt {
		b := tt[i] & 0xff
		tt[c[b]] |= uint32(i) << 8
		c[b]++
	}

	return tt[origPtr] >> 8
}

// This is a standard CRC32 like in hash/crc32 except that all the shifts are reversed,
// causing the bits in the input to be processed in the reverse of the usual order.

var crctab [256]uint32

func init() {
	const poly = 0x04C11DB7
	for i := range crctab {
		crc := uint32(i) << 24
		for j := 0; j < 8; j++ {
			if crc&0x80000000 != 0 {
				crc = (crc << 1) ^ poly
			} else {
				crc <<= 1
			}
		}
		crctab[i] = crc
	}
}

// updateCRC updates the crc value to incorporate the data in b.
// The initial value is 0.
func updateCRC(val uint32, b []byte) uint32 {
	crc := ^val
	for _, v := range b {
		crc = crctab[byte(crc>>24)^v] ^ (crc << 8)
	}
	return ^crc
}
add gcc 4.70 2012-03-27 23:13:14 +00:00			`// Copyright 2011 The Go Authors. All rights reserved.`
			`// Use of this source code is governed by a BSD-style`
			`// license that can be found in the LICENSE file.`

			`// Package bzip2 implements bzip2 decompression.`
			`package bzip2`

			`import "io"`

			`// There's no RFC for bzip2. I used the Wikipedia page for reference and a lot`
			`// of guessing: http://en.wikipedia.org/wiki/Bzip2`
			`// The source code to pyflate was useful for debugging:`
			`// http://www.paul.sladen.org/projects/pyflate`

			`// A StructuralError is returned when the bzip2 data is found to be`
			`// syntactically invalid.`
			`type StructuralError string`

			`func (s StructuralError) Error() string {`
			`return "bzip2 data invalid: " + string(s)`
			`}`

			`// A reader decompresses bzip2 compressed data.`
			`type reader struct {`
upgrade to gcc 4.9.1 2014-09-21 17:33:12 +00:00			`br bitReader`
			`fileCRC uint32`
			`blockCRC uint32`
			`wantBlockCRC uint32`
			`setupDone bool // true if we have parsed the bzip2 header.`
			`blockSize int // blockSize in bytes, i.e. 900 * 1024.`
			`eof bool`
			`buf []byte // stores Burrows-Wheeler transformed data.`
			c [256]uint // the `C' array for the inverse BWT.
			tt []uint32 // mirrors the `tt' array in the bzip2 source and contains the P array in the upper 24 bits.
			`tPos uint32 // Index of the next output byte in tt.`
add gcc 4.70 2012-03-27 23:13:14 +00:00
			`preRLE []uint32 // contains the RLE data still to be processed.`
			`preRLEUsed int // number of entries of preRLE used.`
			`lastByte int // the last byte value seen.`
			`byteRepeats uint // the number of repeats of lastByte seen.`
			`repeats uint // the number of copies of lastByte to output.`
			`}`

			`// NewReader returns an io.Reader which decompresses bzip2 data from r.`
Update gcc to 5.2.0 2015-08-28 15:33:40 +00:00			`// If r does not also implement io.ByteReader,`
			`// the decompressor may read more data than necessary from r.`
add gcc 4.70 2012-03-27 23:13:14 +00:00			`func NewReader(r io.Reader) io.Reader {`
			`bz2 := new(reader)`
			`bz2.br = newBitReader(r)`
			`return bz2`
			`}`

			`const bzip2FileMagic = 0x425a // "BZ"`
			`const bzip2BlockMagic = 0x314159265359`
			`const bzip2FinalMagic = 0x177245385090`

			`// setup parses the bzip2 header.`
upgrade to gcc 4.9.1 2014-09-21 17:33:12 +00:00			`func (bz2 *reader) setup(needMagic bool) error {`
add gcc 4.70 2012-03-27 23:13:14 +00:00			`br := &bz2.br`

upgrade to gcc 4.9.1 2014-09-21 17:33:12 +00:00			`if needMagic {`
			`magic := br.ReadBits(16)`
			`if magic != bzip2FileMagic {`
			`return StructuralError("bad magic value")`
			`}`
add gcc 4.70 2012-03-27 23:13:14 +00:00			`}`

			`t := br.ReadBits(8)`
			`if t != 'h' {`
			`return StructuralError("non-Huffman entropy encoding")`
			`}`

			`level := br.ReadBits(8)`
			`if level < '1' \|\| level > '9' {`
			`return StructuralError("invalid compression level")`
			`}`

upgrade to gcc 4.9.1 2014-09-21 17:33:12 +00:00			`bz2.fileCRC = 0`
add gcc 4.70 2012-03-27 23:13:14 +00:00			`bz2.blockSize = 100 * 1024 * (int(level) - '0')`
upgrade to gcc 4.9.1 2014-09-21 17:33:12 +00:00			`if bz2.blockSize > len(bz2.tt) {`
			`bz2.tt = make([]uint32, bz2.blockSize)`
			`}`
add gcc 4.70 2012-03-27 23:13:14 +00:00			`return nil`
			`}`

			`func (bz2 *reader) Read(buf []byte) (n int, err error) {`
			`if bz2.eof {`
			`return 0, io.EOF`
			`}`

			`if !bz2.setupDone {`
upgrade to gcc 4.9.1 2014-09-21 17:33:12 +00:00			`err = bz2.setup(true)`
add gcc 4.70 2012-03-27 23:13:14 +00:00			`brErr := bz2.br.Err()`
			`if brErr != nil {`
			`err = brErr`
			`}`
			`if err != nil {`
			`return 0, err`
			`}`
			`bz2.setupDone = true`
			`}`

			`n, err = bz2.read(buf)`
			`brErr := bz2.br.Err()`
			`if brErr != nil {`
			`err = brErr`
			`}`
			`return`
			`}`

upgrade to gcc 4.9.1 2014-09-21 17:33:12 +00:00			`func (bz2 *reader) readFromBlock(buf []byte) int {`
add gcc 4.70 2012-03-27 23:13:14 +00:00			`// bzip2 is a block based compressor, except that it has a run-length`
			`// preprocessing step. The block based nature means that we can`
			`// preallocate fixed-size buffers and reuse them. However, the RLE`
			`// preprocessing would require allocating huge buffers to store the`
			`// maximum expansion. Thus we process blocks all at once, except for`
			`// the RLE which we decompress as required.`
upgrade to gcc 4.9.1 2014-09-21 17:33:12 +00:00			`n := 0`
add gcc 4.70 2012-03-27 23:13:14 +00:00			`for (bz2.repeats > 0 \|\| bz2.preRLEUsed < len(bz2.preRLE)) && n < len(buf) {`
			`// We have RLE data pending.`

			`// The run-length encoding works like this:`
			`// Any sequence of four equal bytes is followed by a length`
			`// byte which contains the number of repeats of that byte to`
			`// include. (The number of repeats can be zero.) Because we are`
			`// decompressing on-demand our state is kept in the reader`
			`// object.`

			`if bz2.repeats > 0 {`
			`buf[n] = byte(bz2.lastByte)`
			`n++`
			`bz2.repeats--`
			`if bz2.repeats == 0 {`
			`bz2.lastByte = -1`
			`}`
			`continue`
			`}`

			`bz2.tPos = bz2.preRLE[bz2.tPos]`
			`b := byte(bz2.tPos)`
			`bz2.tPos >>= 8`
			`bz2.preRLEUsed++`

			`if bz2.byteRepeats == 3 {`
			`bz2.repeats = uint(b)`
			`bz2.byteRepeats = 0`
			`continue`
			`}`

			`if bz2.lastByte == int(b) {`
			`bz2.byteRepeats++`
			`} else {`
			`bz2.byteRepeats = 0`
			`}`
			`bz2.lastByte = int(b)`

			`buf[n] = b`
			`n++`
			`}`

upgrade to gcc 4.9.1 2014-09-21 17:33:12 +00:00			`return n`
			`}`
add gcc 4.70 2012-03-27 23:13:14 +00:00
upgrade to gcc 4.9.1 2014-09-21 17:33:12 +00:00			`func (bz2 *reader) read(buf []byte) (int, error) {`
			`for {`
			`n := bz2.readFromBlock(buf)`
			`if n > 0 {`
			`bz2.blockCRC = updateCRC(bz2.blockCRC, buf[:n])`
			`return n, nil`
			`}`
add gcc 4.70 2012-03-27 23:13:14 +00:00
upgrade to gcc 4.9.1 2014-09-21 17:33:12 +00:00			`// End of block. Check CRC.`
			`if bz2.blockCRC != bz2.wantBlockCRC {`
			`bz2.br.err = StructuralError("block checksum mismatch")`
			`return 0, bz2.br.err`
			`}`
add gcc 4.70 2012-03-27 23:13:14 +00:00
upgrade to gcc 4.9.1 2014-09-21 17:33:12 +00:00			`// Find next block.`
			`br := &bz2.br`
			`switch br.ReadBits64(48) {`
			`default:`
			`return 0, StructuralError("bad magic value found")`

			`case bzip2BlockMagic:`
			`// Start of block.`
			`err := bz2.readBlock()`
			`if err != nil {`
			`return 0, err`
			`}`
add gcc 4.70 2012-03-27 23:13:14 +00:00
upgrade to gcc 4.9.1 2014-09-21 17:33:12 +00:00			`case bzip2FinalMagic:`
			`// Check end-of-file CRC.`
			`wantFileCRC := uint32(br.ReadBits64(32))`
			`if br.err != nil {`
			`return 0, br.err`
			`}`
			`if bz2.fileCRC != wantFileCRC {`
			`br.err = StructuralError("file checksum mismatch")`
			`return 0, br.err`
			`}`

			`// Skip ahead to byte boundary.`
			`// Is there a file concatenated to this one?`
			`// It would start with BZ.`
			`if br.bits%8 != 0 {`
			`br.ReadBits(br.bits % 8)`
			`}`
			`b, err := br.r.ReadByte()`
			`if err == io.EOF {`
			`br.err = io.EOF`
			`bz2.eof = true`
			`return 0, io.EOF`
			`}`
			`if err != nil {`
			`br.err = err`
			`return 0, err`
			`}`
			`z, err := br.r.ReadByte()`
			`if err != nil {`
			`if err == io.EOF {`
			`err = io.ErrUnexpectedEOF`
			`}`
			`br.err = err`
			`return 0, err`
			`}`
			`if b != 'B' \|\| z != 'Z' {`
			`return 0, StructuralError("bad magic value in continuation file")`
			`}`
			`if err := bz2.setup(false); err != nil {`
			`return 0, err`
			`}`
			`}`
			`}`
add gcc 4.70 2012-03-27 23:13:14 +00:00			`}`

			`// readBlock reads a bzip2 block. The magic number should already have been consumed.`
			`func (bz2 *reader) readBlock() (err error) {`
			`br := &bz2.br`
upgrade to gcc 4.9.1 2014-09-21 17:33:12 +00:00			`bz2.wantBlockCRC = uint32(br.ReadBits64(32)) // skip checksum. TODO: check it if we can figure out what it is.`
			`bz2.blockCRC = 0`
			`bz2.fileCRC = (bz2.fileCRC<<1 \| bz2.fileCRC>>31) ^ bz2.wantBlockCRC`
add gcc 4.70 2012-03-27 23:13:14 +00:00			`randomized := br.ReadBits(1)`
			`if randomized != 0 {`
			`return StructuralError("deprecated randomized files")`
			`}`
			`origPtr := uint(br.ReadBits(24))`

			`// If not every byte value is used in the block (i.e., it's text) then`
			`// the symbol set is reduced. The symbols used are stored as a`
			`// two-level, 16x16 bitmap.`
			`symbolRangeUsedBitmap := br.ReadBits(16)`
			`symbolPresent := make([]bool, 256)`
			`numSymbols := 0`
			`for symRange := uint(0); symRange < 16; symRange++ {`
			`if symbolRangeUsedBitmap&(1<<(15-symRange)) != 0 {`
			`bits := br.ReadBits(16)`
			`for symbol := uint(0); symbol < 16; symbol++ {`
			`if bits&(1<<(15-symbol)) != 0 {`
			`symbolPresent[16*symRange+symbol] = true`
			`numSymbols++`
			`}`
			`}`
			`}`
			`}`

Update gcc to 5.2.0 2015-08-28 15:33:40 +00:00			`if numSymbols == 0 {`
			`// There must be an EOF symbol.`
			`return StructuralError("no symbols in input")`
			`}`

add gcc 4.70 2012-03-27 23:13:14 +00:00			`// A block uses between two and six different Huffman trees.`
			`numHuffmanTrees := br.ReadBits(3)`
			`if numHuffmanTrees < 2 \|\| numHuffmanTrees > 6 {`
			`return StructuralError("invalid number of Huffman trees")`
			`}`

			`// The Huffman tree can switch every 50 symbols so there's a list of`
			`// tree indexes telling us which tree to use for each 50 symbol block.`
			`numSelectors := br.ReadBits(15)`
			`treeIndexes := make([]uint8, numSelectors)`

			`// The tree indexes are move-to-front transformed and stored as unary`
			`// numbers.`
			`mtfTreeDecoder := newMTFDecoderWithRange(numHuffmanTrees)`
			`for i := range treeIndexes {`
			`c := 0`
			`for {`
			`inc := br.ReadBits(1)`
			`if inc == 0 {`
			`break`
			`}`
			`c++`
			`}`
			`if c >= numHuffmanTrees {`
			`return StructuralError("tree index too large")`
			`}`
			`treeIndexes[i] = uint8(mtfTreeDecoder.Decode(c))`
			`}`

			`// The list of symbols for the move-to-front transform is taken from`
			`// the previously decoded symbol bitmap.`
			`symbols := make([]byte, numSymbols)`
			`nextSymbol := 0`
			`for i := 0; i < 256; i++ {`
			`if symbolPresent[i] {`
			`symbols[nextSymbol] = byte(i)`
			`nextSymbol++`
			`}`
			`}`
			`mtf := newMTFDecoder(symbols)`

			`numSymbols += 2 // to account for RUNA and RUNB symbols`
			`huffmanTrees := make([]huffmanTree, numHuffmanTrees)`

			`// Now we decode the arrays of code-lengths for each tree.`
			`lengths := make([]uint8, numSymbols)`
Update gcc to 5.2.0 2015-08-28 15:33:40 +00:00			`for i := range huffmanTrees {`
add gcc 4.70 2012-03-27 23:13:14 +00:00			`// The code lengths are delta encoded from a 5-bit base value.`
			`length := br.ReadBits(5)`
Update gcc to 5.2.0 2015-08-28 15:33:40 +00:00			`for j := range lengths {`
add gcc 4.70 2012-03-27 23:13:14 +00:00			`for {`
			`if !br.ReadBit() {`
			`break`
			`}`
			`if br.ReadBit() {`
			`length--`
			`} else {`
			`length++`
			`}`
			`}`
			`if length < 0 \|\| length > 20 {`
			`return StructuralError("Huffman length out of range")`
			`}`
			`lengths[j] = uint8(length)`
			`}`
			`huffmanTrees[i], err = newHuffmanTree(lengths)`
			`if err != nil {`
			`return err`
			`}`
			`}`

			`selectorIndex := 1 // the next tree index to use`
Update gcc to 5.2.0 2015-08-28 15:33:40 +00:00			`if len(treeIndexes) == 0 {`
			`return StructuralError("no tree selectors given")`
			`}`
			`if int(treeIndexes[0]) >= len(huffmanTrees) {`
			`return StructuralError("tree selector out of range")`
			`}`
add gcc 4.70 2012-03-27 23:13:14 +00:00			`currentHuffmanTree := huffmanTrees[treeIndexes[0]]`
			`bufIndex := 0 // indexes bz2.buf, the output buffer.`
			`// The output of the move-to-front transform is run-length encoded and`
			`// we merge the decoding into the Huffman parsing loop. These two`
			`// variables accumulate the repeat count. See the Wikipedia page for`
			`// details.`
			`repeat := 0`
			`repeat_power := 0`

			// The `C' array (used by the inverse BWT) needs to be zero initialized.
			`for i := range bz2.c {`
			`bz2.c[i] = 0`
			`}`

			`decoded := 0 // counts the number of symbols decoded by the current tree.`
			`for {`
			`if decoded == 50 {`
Update gcc to 5.2.0 2015-08-28 15:33:40 +00:00			`if selectorIndex >= numSelectors {`
			`return StructuralError("insufficient selector indices for number of symbols")`
			`}`
			`if int(treeIndexes[selectorIndex]) >= len(huffmanTrees) {`
			`return StructuralError("tree selector out of range")`
			`}`
add gcc 4.70 2012-03-27 23:13:14 +00:00			`currentHuffmanTree = huffmanTrees[treeIndexes[selectorIndex]]`
			`selectorIndex++`
			`decoded = 0`
			`}`

			`v := currentHuffmanTree.Decode(br)`
			`decoded++`

			`if v < 2 {`
			`// This is either the RUNA or RUNB symbol.`
			`if repeat == 0 {`
			`repeat_power = 1`
			`}`
			`repeat += repeat_power << v`
			`repeat_power <<= 1`

			`// This limit of 2 million comes from the bzip2 source`
			`// code. It prevents repeat from overflowing.`
			`if repeat > 210241024 {`
			`return StructuralError("repeat count too large")`
			`}`
			`continue`
			`}`

			`if repeat > 0 {`
			`// We have decoded a complete run-length so we need to`
			`// replicate the last output symbol.`
upgrade to gcc 4.9.1 2014-09-21 17:33:12 +00:00			`if repeat > bz2.blockSize-bufIndex {`
			`return StructuralError("repeats past end of block")`
			`}`
add gcc 4.70 2012-03-27 23:13:14 +00:00			`for i := 0; i < repeat; i++ {`
			`b := byte(mtf.First())`
			`bz2.tt[bufIndex] = uint32(b)`
			`bz2.c[b]++`
			`bufIndex++`
			`}`
			`repeat = 0`
			`}`

			`if int(v) == numSymbols-1 {`
			`// This is the EOF symbol. Because it's always at the`
			`// end of the move-to-front list, and never gets moved`
			`// to the front, it has this unique value.`
			`break`
			`}`

			`// Since two metasymbols (RUNA and RUNB) have values 0 and 1,`
			`// one would expect \|v-2\| to be passed to the MTF decoder.`
			`// However, the front of the MTF list is never referenced as 0,`
			`// it's always referenced with a run-length of 1. Thus 0`
			`// doesn't need to be encoded and we have \|v-1\| in the next`
			`// line.`
			`b := byte(mtf.Decode(int(v - 1)))`
upgrade to gcc 4.9.1 2014-09-21 17:33:12 +00:00			`if bufIndex >= bz2.blockSize {`
			`return StructuralError("data exceeds block size")`
			`}`
add gcc 4.70 2012-03-27 23:13:14 +00:00			`bz2.tt[bufIndex] = uint32(b)`
			`bz2.c[b]++`
			`bufIndex++`
			`}`

			`if origPtr >= uint(bufIndex) {`
			`return StructuralError("origPtr out of bounds")`
			`}`

			`// We have completed the entropy decoding. Now we can perform the`
			`// inverse BWT and setup the RLE buffer.`
			`bz2.preRLE = bz2.tt[:bufIndex]`
			`bz2.preRLEUsed = 0`
			`bz2.tPos = inverseBWT(bz2.preRLE, origPtr, bz2.c[:])`
			`bz2.lastByte = -1`
			`bz2.byteRepeats = 0`
			`bz2.repeats = 0`

			`return nil`
			`}`

			`// inverseBWT implements the inverse Burrows-Wheeler transform as described in`
			`// http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf, section 4.2.`
			// In that document, origPtr is called `I' and c is the `C' array after the
			`// first pass over the data. It's an argument here because we merge the first`
			`// pass with the Huffman decoding.`
			`//`
			// This also implements the `single array' method from the bzip2 source code
			`// which leaves the output, still shuffled, in the bottom 8 bits of tt with the`
			`// index of the next byte in the top 24-bits. The index of the first byte is`
			`// returned.`
			`func inverseBWT(tt []uint32, origPtr uint, c []uint) uint32 {`
			`sum := uint(0)`
			`for i := 0; i < 256; i++ {`
			`sum += c[i]`
			`c[i] = sum - c[i]`
			`}`

			`for i := range tt {`
			`b := tt[i] & 0xff`
			`tt[c[b]] \|= uint32(i) << 8`
			`c[b]++`
			`}`

			`return tt[origPtr] >> 8`
			`}`
upgrade to gcc 4.9.1 2014-09-21 17:33:12 +00:00
			`// This is a standard CRC32 like in hash/crc32 except that all the shifts are reversed,`
			`// causing the bits in the input to be processed in the reverse of the usual order.`

			`var crctab [256]uint32`

			`func init() {`
			`const poly = 0x04C11DB7`
			`for i := range crctab {`
			`crc := uint32(i) << 24`
			`for j := 0; j < 8; j++ {`
			`if crc&0x80000000 != 0 {`
			`crc = (crc << 1) ^ poly`
			`} else {`
			`crc <<= 1`
			`}`
			`}`
			`crctab[i] = crc`
			`}`
			`}`

			`// updateCRC updates the crc value to incorporate the data in b.`
			`// The initial value is 0.`
			`func updateCRC(val uint32, b []byte) uint32 {`
			`crc := ^val`
			`for _, v := range b {`
			`crc = crctab[byte(crc>>24)^v] ^ (crc << 8)`
			`}`
			`return ^crc`
			`}`