GNO port source code (from 2003)

This commit is contained in:
Stephen Heumann 2015-10-10 14:36:21 -05:00
parent 18bf65b3db
commit 160de28119
56 changed files with 1396 additions and 14474 deletions

220
Makefile
View File

@ -1,192 +1,94 @@
# Makefile for bunzip2 for GNO (for use with dmake)
# Based on Unix Makefile for bzip2
# Modified for GNO by Stephen Heumann
SHELL=/bin/sh
# ORCA/C 2.1.0 may need more than 8 megabytes of RAM to compile decompress.c
# with full optimization enabled. Thus, this makefile can only
# be used as is on an emulated system with 14 megabyte RAM support.
# To assist in cross-compiling
CC=gcc
AR=ar
RANLIB=ranlib
# Uncomment this if make doesn't have the $CC variable set appropriately
# CC=occ
RM=cp -p rm
LDFLAGS=
# Suitably paranoid flags to avoid bugs in gcc-2.7
BIGFILES=-D_FILE_OFFSET_BITS=64
CFLAGS=-Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce $(BIGFILES)
# The "-I /usr/include" shouldn't be needed but seemed to fix problems for me
CFLAGS=-a0 -w -O -I /usr/include
NOROOTFLAG=-r
# Where you want it installed when you do 'make install'
PREFIX=/usr
PREFIX=/usr/local
OBJS= blocksort.o \
OBJS= stristr.o \
huffman.o \
crctable.o \
randtable.o \
compress.o \
decompress.o \
bzlib.o
all: libbz2.a bzip2 bzip2recover test
all: bunzip2 bzip2recover test
bzip2: libbz2.a bzip2.o
$(CC) $(CFLAGS) $(LDFLAGS) -o bzip2 bzip2.o -L. -lbz2
bunzip2: bzip2.o $(OBJS)
$(CC) -o bunzip2 bunzip2.rez
$(CC) $(CFLAGS) $(LDFLAGS) bzip2.o $(OBJS) -o bunzip2
bzip2recover: bzip2recover.o
$(CC) $(CFLAGS) $(LDFLAGS) -o bzip2recover bzip2recover.o
libbz2.a: $(OBJS)
rm -f libbz2.a
$(AR) cq libbz2.a $(OBJS)
@if ( test -f $(RANLIB) -o -f /usr/bin/ranlib -o \
-f /bin/ranlib -o -f /usr/ccs/bin/ranlib ) ; then \
echo $(RANLIB) libbz2.a ; \
$(RANLIB) libbz2.a ; \
fi
$(CC) -o bzip2recover bzip2recover.rez
$(CC) $(CFLAGS) $(LDFLAGS) bzip2recover.o -o bzip2recover
check: test
test: bzip2
test: bunzip2
@cat words1
./bzip2 -1 < sample1.ref > sample1.rb2
./bzip2 -2 < sample2.ref > sample2.rb2
./bzip2 -3 < sample3.ref > sample3.rb2
./bzip2 -d < sample1.bz2 > sample1.tst
./bzip2 -d < sample2.bz2 > sample2.tst
./bzip2 -ds < sample3.bz2 > sample3.tst
cmp sample1.bz2 sample1.rb2
cmp sample2.bz2 sample2.rb2
cmp sample3.bz2 sample3.rb2
./bunzip2 -dk < sample1.bz2 > sample1.tst
./bunzip2 -dk < sample2.bz2 > sample2.tst
./bunzip2 -dks < sample3.bz2 > sample3.tst
@cat words2
cmp sample1.tst sample1.ref
cmp sample2.tst sample2.ref
cmp sample3.tst sample3.ref
@cat words3
install: bzip2 bzip2recover
if ( test ! -d $(PREFIX)/bin ) ; then mkdir -p $(PREFIX)/bin ; fi
if ( test ! -d $(PREFIX)/lib ) ; then mkdir -p $(PREFIX)/lib ; fi
if ( test ! -d $(PREFIX)/man ) ; then mkdir -p $(PREFIX)/man ; fi
if ( test ! -d $(PREFIX)/man/man1 ) ; then mkdir -p $(PREFIX)/man/man1 ; fi
if ( test ! -d $(PREFIX)/include ) ; then mkdir -p $(PREFIX)/include ; fi
cp -f bzip2 $(PREFIX)/bin/bzip2
cp -f bzip2 $(PREFIX)/bin/bunzip2
cp -f bzip2 $(PREFIX)/bin/bzcat
install: bunzip2 bzip2recover test justinstall
justinstall:
# This should install bunzip2 for GNO under /usr/local
mkdir $(PREFIX)/bin >& .null
mkdir $(PREFIX)/man >& .null
mkdir $(PREFIX)/man/man1 >& .null
cp -f bunzip2 $(PREFIX)/bin/bunzip2
cp -f bzip2recover $(PREFIX)/bin/bzip2recover
chmod a+x $(PREFIX)/bin/bzip2
chmod a+x $(PREFIX)/bin/bunzip2
chmod a+x $(PREFIX)/bin/bzcat
chmod a+x $(PREFIX)/bin/bzip2recover
cp -f bzip2.1 $(PREFIX)/man/man1
chmod a+r $(PREFIX)/man/man1/bzip2.1
cp -f bzlib.h $(PREFIX)/include
chmod a+r $(PREFIX)/include/bzlib.h
cp -f libbz2.a $(PREFIX)/lib
chmod a+r $(PREFIX)/lib/libbz2.a
cp -f bzgrep $(PREFIX)/bin/bzgrep
ln $(PREFIX)/bin/bzgrep $(PREFIX)/bin/bzegrep
ln $(PREFIX)/bin/bzgrep $(PREFIX)/bin/bzfgrep
chmod a+x $(PREFIX)/bin/bzgrep
cp -f bzmore $(PREFIX)/bin/bzmore
ln $(PREFIX)/bin/bzmore $(PREFIX)/bin/bzless
chmod a+x $(PREFIX)/bin/bzmore
cp -f bzdiff $(PREFIX)/bin/bzdiff
ln $(PREFIX)/bin/bzdiff $(PREFIX)/bin/bzcmp
chmod a+x $(PREFIX)/bin/bzdiff
cp -f bzgrep.1 bzmore.1 bzdiff.1 $(PREFIX)/man/man1
chmod a+r $(PREFIX)/man/man1/bzgrep.1
chmod a+r $(PREFIX)/man/man1/bzmore.1
chmod a+r $(PREFIX)/man/man1/bzdiff.1
echo ".so man1/bzgrep.1" > $(PREFIX)/man/man1/bzegrep.1
echo ".so man1/bzgrep.1" > $(PREFIX)/man/man1/bzfgrep.1
echo ".so man1/bzmore.1" > $(PREFIX)/man/man1/bzless.1
echo ".so man1/bzdiff.1" > $(PREFIX)/man/man1/bzcmp.1
cp -f bunzip2.1 $(PREFIX)/man/man1/bunzip2.1
cp -f bzip2recover.1 $(PREFIX)/man/man1/bzip2recover.1
cp -f bzcat.1 $(PREFIX)/man/man1/bzcat.1
@cat words4
distclean: clean
clean:
rm -f *.o libbz2.a bzip2 bzip2recover \
sample1.rb2 sample2.rb2 sample3.rb2 \
clean:
$(RM) -f *.o *.a *.sym *.root bunzip2 bzip2recover \
sample1.tst sample2.tst sample3.tst
blocksort.o: blocksort.c
@cat words0
$(CC) $(CFLAGS) -c blocksort.c
huffman.o: huffman.c
$(CC) $(CFLAGS) -c huffman.c
crctable.o: crctable.c
$(CC) $(CFLAGS) -c crctable.c
randtable.o: randtable.c
$(CC) $(CFLAGS) -c randtable.c
compress.o: compress.c
$(CC) $(CFLAGS) -c compress.c
decompress.o: decompress.c
$(CC) $(CFLAGS) -c decompress.c
bzlib.o: bzlib.c
$(CC) $(CFLAGS) -c bzlib.c
bzip2.o: bzip2.c
$(CC) $(CFLAGS) -c bzip2.c
stristr.o: stristr.c
$(CC) $(CFLAGS) $(NOROOTFLAG) -c stristr.c
huffman.o: huffman.c bzlib_private.h
$(CC) $(CFLAGS) $(NOROOTFLAG) -c huffman.c
crctable.o: crctable.c bzlib_private.h
$(CC) $(CFLAGS) $(NOROOTFLAG) -c crctable.c
randtable.o: randtable.c bzlib_private.h
$(CC) $(CFLAGS) $(NOROOTFLAG) -c randtable.c
decompress.o: decompress.c bzlib_private.h
$(CC) $(CFLAGS) $(NOROOTFLAG) -c decompress.c
bzlib.o: bzlib.c bzlib_private.h
$(CC) $(CFLAGS) $(NOROOTFLAG) -c bzlib.c
bzip2.o: bzip2.c bzlib.h
$(CC) $(CFLAGS) -s 2048 -C1 -c bzip2.c
# $(CC) $(CFLAGS) -C1 -D __STACK_CHECK__ -c bzip2.c
bzip2recover.o: bzip2recover.c
$(CC) $(CFLAGS) -c bzip2recover.c
$(CC) $(CFLAGS) -s 1024 -c bzip2recover.c
# $(CC) $(CFLAGS) -D __STACK_CHECK__ -c bzip2recover.c
bzlib_private.h: bzlib.h
DISTNAME=bzip2-1.0.2
tarfile:
rm -f $(DISTNAME)
ln -sf . $(DISTNAME)
tar cvf $(DISTNAME).tar \
$(DISTNAME)/blocksort.c \
$(DISTNAME)/huffman.c \
$(DISTNAME)/crctable.c \
$(DISTNAME)/randtable.c \
$(DISTNAME)/compress.c \
$(DISTNAME)/decompress.c \
$(DISTNAME)/bzlib.c \
$(DISTNAME)/bzip2.c \
$(DISTNAME)/bzip2recover.c \
$(DISTNAME)/bzlib.h \
$(DISTNAME)/bzlib_private.h \
$(DISTNAME)/Makefile \
$(DISTNAME)/manual.texi \
$(DISTNAME)/manual.ps \
$(DISTNAME)/manual.pdf \
$(DISTNAME)/LICENSE \
$(DISTNAME)/bzip2.1 \
$(DISTNAME)/bzip2.1.preformatted \
$(DISTNAME)/bzip2.txt \
$(DISTNAME)/words0 \
$(DISTNAME)/words1 \
$(DISTNAME)/words2 \
$(DISTNAME)/words3 \
$(DISTNAME)/sample1.ref \
$(DISTNAME)/sample2.ref \
$(DISTNAME)/sample3.ref \
$(DISTNAME)/sample1.bz2 \
$(DISTNAME)/sample2.bz2 \
$(DISTNAME)/sample3.bz2 \
$(DISTNAME)/dlltest.c \
$(DISTNAME)/*.html \
$(DISTNAME)/README \
$(DISTNAME)/README.COMPILATION.PROBLEMS \
$(DISTNAME)/CHANGES \
$(DISTNAME)/libbz2.def \
$(DISTNAME)/libbz2.dsp \
$(DISTNAME)/dlltest.dsp \
$(DISTNAME)/makefile.msc \
$(DISTNAME)/Y2K_INFO \
$(DISTNAME)/unzcrash.c \
$(DISTNAME)/spewG.c \
$(DISTNAME)/mk251.c \
$(DISTNAME)/bzdiff \
$(DISTNAME)/bzdiff.1 \
$(DISTNAME)/bzmore \
$(DISTNAME)/bzmore.1 \
$(DISTNAME)/bzgrep \
$(DISTNAME)/bzgrep.1 \
$(DISTNAME)/Makefile-libbz2_so
gzip -v $(DISTNAME).tar
# For rebuilding the manual from sources on my RedHat 7.2 box
manual: manual.ps manual.pdf manual.html
manual.ps: manual.texi
tex manual.texi
dvips -o manual.ps manual.dvi
manual.pdf: manual.ps
ps2pdf manual.ps
manual.html: manual.texi
texi2html -split_chapter manual.texi
chtyp:
chtyp -l cc *.c *.h

View File

@ -1,44 +0,0 @@
# This Makefile builds a shared version of the library,
# libbz2.so.1.0.2, with soname libbz2.so.1.0,
# at least on x86-Linux (RedHat 7.2),
# with gcc-2.96 20000731 (Red Hat Linux 7.1 2.96-98).
# Please see the README file for some
# important info about building the library like this.
SHELL=/bin/sh
CC=gcc
BIGFILES=-D_FILE_OFFSET_BITS=64
CFLAGS=-fpic -fPIC -Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce $(BIGFILES)
OBJS= blocksort.o \
huffman.o \
crctable.o \
randtable.o \
compress.o \
decompress.o \
bzlib.o
all: $(OBJS)
$(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.2 $(OBJS)
$(CC) $(CFLAGS) -o bzip2-shared bzip2.c libbz2.so.1.0.2
rm -f libbz2.so.1.0
ln -s libbz2.so.1.0.2 libbz2.so.1.0
clean:
rm -f $(OBJS) bzip2.o libbz2.so.1.0.2 libbz2.so.1.0 bzip2-shared
blocksort.o: blocksort.c
$(CC) $(CFLAGS) -c blocksort.c
huffman.o: huffman.c
$(CC) $(CFLAGS) -c huffman.c
crctable.o: crctable.c
$(CC) $(CFLAGS) -c crctable.c
randtable.o: randtable.c
$(CC) $(CFLAGS) -c randtable.c
compress.o: compress.c
$(CC) $(CFLAGS) -c compress.c
decompress.o: decompress.c
$(CC) $(CFLAGS) -c decompress.c
bzlib.o: bzlib.c
$(CC) $(CFLAGS) -c bzlib.c

View File

@ -1,130 +0,0 @@
bzip2-1.0 should compile without problems on the vast majority of
platforms. Using the supplied Makefile, I've built and tested it
myself for x86-linux, sparc-solaris, alpha-linux, x86-cygwin32 and
alpha-tru64unix. With makefile.msc, Visual C++ 6.0 and nmake, you can
build a native Win32 version too. Large file support seems to work
correctly on at least alpha-tru64unix and x86-cygwin32 (on Windows
2000).
When I say "large file" I mean a file of size 2,147,483,648 (2^31)
bytes or above. Many older OSs can't handle files above this size,
but many newer ones can. Large files are pretty huge -- most files
you'll encounter are not Large Files.
Earlier versions of bzip2 (0.1, 0.9.0, 0.9.5) compiled on a wide
variety of platforms without difficulty, and I hope this version will
continue in that tradition. However, in order to support large files,
I've had to include the define -D_FILE_OFFSET_BITS=64 in the Makefile.
This can cause problems.
The technique of adding -D_FILE_OFFSET_BITS=64 to get large file
support is, as far as I know, the Recommended Way to get correct large
file support. For more details, see the Large File Support
Specification, published by the Large File Summit, at
http://www.sas.com/standard/large.file/
As a general comment, if you get compilation errors which you think
are related to large file support, try removing the above define from
the Makefile, ie, delete the line
BIGFILES=-D_FILE_OFFSET_BITS=64
from the Makefile, and do 'make clean ; make'. This will give you a
version of bzip2 without large file support, which, for most
applications, is probably not a problem.
Alternatively, try some of the platform-specific hints listed below.
You can use the spewG.c program to generate huge files to test bzip2's
large file support, if you are feeling paranoid. Be aware though that
any compilation problems which affect bzip2 will also affect spewG.c,
alas.
Known problems as of 1.0pre8:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* HP/UX 10.20 and 11.00, using gcc (2.7.2.3 and 2.95.2): A large
number of warnings appear, including the following:
/usr/include/sys/resource.h: In function `getrlimit':
/usr/include/sys/resource.h:168:
warning: implicit declaration of function `__getrlimit64'
/usr/include/sys/resource.h: In function `setrlimit':
/usr/include/sys/resource.h:170:
warning: implicit declaration of function `__setrlimit64'
This would appear to be a problem with large file support, header
files and gcc. gcc may or may not give up at this point. If it
fails, you might be able to improve matters by adding
-D__STDC_EXT__=1
to the BIGFILES variable in the Makefile (ie, change its definition
to
BIGFILES=-D_FILE_OFFSET_BITS=64 -D__STDC_EXT__=1
Even if gcc does produce a binary which appears to work (ie passes
its self-tests), you might want to test it to see if it works properly
on large files.
* HP/UX 10.20 and 11.00, using HP's cc compiler.
No specific problems for this combination, except that you'll need to
specify the -Ae flag, and zap the gcc-specific stuff
-Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce.
You should retain -D_FILE_OFFSET_BITS=64 in order to get large
file support -- which is reported to work ok for this HP/UX + cc
combination.
* SunOS 4.1.X.
Amazingly, there are still people out there using this venerable old
banger. I shouldn't be too rude -- I started life on SunOS, and
it was a pretty darn good OS, way back then. Anyway:
SunOS doesn't seem to have strerror(), so you'll have to use
perror(), perhaps by doing adding this (warning: UNTESTED CODE):
char* strerror ( int errnum )
{
if (errnum < 0 || errnum >= sys_nerr)
return "Unknown error";
else
return sys_errlist[errnum];
}
Or you could comment out the relevant calls to strerror; they're
not mission-critical. Or you could upgrade to Solaris. Ha ha ha!
(what?? you think I've got Bad Attitude?)
* Making a shared library on Solaris. (Not really a compilation
problem, but many people ask ...)
Firstly, if you have Solaris 8, either you have libbz2.so already
on your system, or you can install it from the Solaris CD.
Secondly, be aware that there are potential naming conflicts
between the .so file supplied with Solaris 8, and the .so file
which Makefile-libbz2_so will make. Makefile-libbz2_so creates
a .so which has the names which I intend to be "official" as
of version 1.0.0 and onwards. Unfortunately, the .so in
Solaris 8 appeared before I decided on the final names, so
the two libraries are incompatible. We have since communicated
and I hope that the problems will have been solved in the next
version of Solaris, whenever that might appear.
All that said: you might be able to get somewhere
by finding the line in Makefile-libbz2_so which says
$(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.2 $(OBJS)
and replacing with
$(CC) -G -shared -o libbz2.so.1.0.2 -h libbz2.so.1.0 $(OBJS)
If gcc objects to the combination -fpic -fPIC, get rid of
the second one, leaving just "-fpic".
That's the end of the currently known compilation problems.

158
README.GNO Normal file
View File

@ -0,0 +1,158 @@
README FOR BUNZIP2 FOR GNO
==========================
This is a port of the bunzip2 archive decompression program to the GNO
environment on the Apple IIgs. It is based on Julian Seward's original
bzip2 program, but it includes only the decompression (and testing)
functionality; compression is disabled. This archive also includes the
bzip2recover program, which may allow you to recover some data from a
partially corrupted bzip2 archive file. These programs correspond to
Julian Seward's bzip2 version 1.0.2.
REQUIREMENTS
============
Bunzip2 reguires a ROM 01 or ROM 3 Apple IIgs (or an emulator thereof)
running IIgs System Software 6.0.1 and GNO 2.0.6 (or later).
Bunzip2 also needs considerable memory. It will not be able to decompress
most archives if you have less than 4 megabytes of RAM. On 4-5 MB
systems, you will likely have to specify the -s option to minimize memory
usage; on an 8MB (or 14MB) system, this will probably not be necessary,
unless you have a very large number of system extensions or other programs
running under GNO. See the manpage for more details on memory usage.
If bunzip2 gives you an out-of-memory error the first time you run it, try
again. The first attempt may have caused the system to reorganize memory
and purge unneeded data, freeing up enough space to run bunzip2 on the
second attempt.
Bunzip2 will also benefit from an accelerator, although one is obviously
not required. Even with an accelerator, it can be rather slow when
decompressing larger archives. Be prepared to wait a very long time
(several hours or even longer) for bunzip2 to finish decompressing large
bzip2 archives.
INSTALLATION
============
To install bunzip2, simply run "dmake justinstall". Alternatively, you can
install it manually: copy the bunzip2 and bzip2recover programs to your GNO
installation's /usr/local/bin directory, and copy the bunzip2.1, bzcat.1,
and bzip2recover.1 manpages to the /usr/local/man/man1 directory.
After installing bunzip2, you should read the manpage for directions on how
to use it. You can put the following line in your gshrc file so you can use
'bzcat' as documented in the manpage:
alias bzcat "bunzip2 -c"
NOTES ON THE SOURCE CODE
========================
[If you just want to use bunzip2, you do not need to read this section.]
Please note that a couple source files use non-ProDOS compatible filenames.
If you do not have an HFS or AppleShare partition available, these can
easily be changed to fit ProDOS conventions.
I had to make several changes to the bzip2 program when porting it to GNO.
The code is not very good-looking, but it does compile without warnings.
First, I disabled the compression functionality and set up the program to
decompress by default (and I renamed the binary to 'bunzip2' to reflect
this). The compression functionality is not very important on the GS, since
bzip2 is not a very good choice for compressing GS-specific data; ShrinkIt
will be much faster and preserves GS-specific file attributes. Even if you
want to create archives for use on UNIX-like systems, compress or gzip is
a better choice, and both are already available under GNO. For these
reasons, and because it reduced the amount of code that I had to modify, I
removed the compression functionality from bunzip2.
Other major changes to the code fell into several categories:
(1) Type sizes: Most of the code used defines for types such as Int32, making
it easy to adapt to the GS's 16-bit ints. The interface between the
bzip2 program and code designed to be compiled as 'libbzip2,' however,
assumes that int is 32 bits, so I had to modify it to use the appropriate
integer types on the GS. There were also silent assumptions in some
other areas that native ints are 32 bits, and I had to identify and
correct these. There were also variables specified as 'Int32' even
though 16 bits were sufficient to represent their possible range of
values; when I noticed these variables, I changed them appropriately.
(2) ORCA/C compiler limitations: ORCA/C in its 'small mode' (the only one
supported by the GNO libraries) places a 64k restriction on the size
of data structures that can be addressed as arrays. This is a problem
with bunzip2, which allocates and uses multi-megabyte data structures.
To work around this, I changed array-style references to these data
structures to use printer arithmetic instead, working around the
limitation (eg. I changed references to 'a[b]' to '*(a+b)'. ). I also
changed large local variables to be static or dynamically allocated
in order to avoid excessive stack usage.
(3) ORCA/C compiler bugs: In several cases ORCA/C 2.1.0 generated bad code
at the maximum optimization level. Most instances where reduced
optimization levels are used are necessary to work around bugs encountered
when using the disabled optimizations. Also, the size of the main
decompression function in decompress.c stresses ORCA/C. I modified
the GET_BITS macro to reduce the code size of the BZ2_decompress function
by making some of the code into a separate function. If this is not done
or if optimization is not enabled (increasing the compiled code size
as compared to when optimization is enabled), the compiler will crash,
give an error, or generate bad object code that gives linker errors.
(4) Modifications to work well with GNO and GS/OS These include setting the
output filetype and disabling newline translation in GNO's stdio
implementation. I also set the stack sizes of the programs to
appropriate values and enabled stack checking for the small recursive
segment of the program (although it shouldn't actually pose any problem).
Additionally, I changed filename operations to be case-insensitive,
reflecting the case-insensitive nature of filesystems in the Apple IIgs.
I made most modifications conditional on the __appleiigs__, __ORCAC__, or
__GNO__ macros. Which macro I used gives some hint at the reason for each
modification, although all or none should be used to produce a working
executable (changes conditionalized on one macro may depend on those
conditionalized on another).
COMPILING
=========
The included Makefile can be used with dmake, occ, and ORCA/C 2.1.0, all of
which should be installed in your GNO 2.0.6 installation. You will also need
a copy of the lsaneglue library (which is missing from the default GNO 2.0.6
installation) to be present in your GNO /lib directory. Run 'dmake bunzip2'
to build the main program or 'dmake test' to build both programs and run a
simple test to ensure that bunzip2 is working correctly.
There are some special considerations necessary when compiling the file
decompress.c. As noted above, it must be compiled with (nearly) full
optimization to compile properly. To compile it with full optimization using
ORCA/C 2.1.0, however, requires more than 8MB of memory. Thus, decompress.c
(and by extension the bunzip2 program as a whole) can only be compiled on an
emulator with 14MB memory support enabled. The only emulators that presently
support this are Bernie ][ The Rescue and Sweet16. I have included a
prebuilt object file (decompress.o) so that you can rebuild bunzip2 with
changes to other source files using a real IIgs.
AREAS FOR IMPROVEMENT
=====================
* Resource forks and GS/OS filetypes are not supported. This is not a major
problem; other programs such as ShrinkIt should be used for GS-specific
archives.
* Compression could be reenabled. This would require adapting the compression
and block sorting routines to work properly under GNO on the GS.
* Some or all of the program could be rewritten in assembly language. This
would improve its performance by some amount, although I don't know how
much. It also might reduce memory usage. This would require a full
understanding of the BWT compression and decompression algorithms used in
bzip2, which I do not presently possess.
SUPPORT
=======
I can be contacted by email at sheumann@myrealbox.com . Please contect me,
rather than Julian Seward, about any problems that you are experiencing only
in the GNO version of bunzip2.
--
Stephen Heumann <sheumann@myrealbox.com>

View File

@ -1,34 +0,0 @@
Y2K status of bzip2 and libbzip2, versions 0.1, 0.9.0 and 0.9.5
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Informally speaking:
bzip2 is a compression program built on top of libbzip2,
a library which does the real work of compression and
decompression. As far as I am aware, libbzip2 does not have
any date-related code at all.
bzip2 itself copies dates from source to destination files
when compressing or decompressing, using the 'stat' and 'utime'
UNIX system calls. It doesn't examine, manipulate or store the
dates in any way. So as far as I can see, there shouldn't be any
problem with bzip2 providing 'stat' and 'utime' work correctly
on your system.
On non-unix platforms (those for which BZ_UNIX in bzip2.c is
not set to 1), bzip2 doesn't even do the date copying.
Overall, informally speaking, I don't think bzip2 or libbzip2
have a Y2K problem.
Formally speaking:
I am not prepared to offer you any assurance whatsoever
regarding Y2K issues in my software. You alone assume the
entire risk of using the software. The disclaimer of liability
in the LICENSE file in the bzip2 source distribution continues
to apply on this issue as with every other issue pertaining
to the software.
Julian Seward
Cambridge, UK
25 August 1999

File diff suppressed because it is too large Load Diff

362
bunzip2.1 Normal file
View File

@ -0,0 +1,362 @@
.TH BUNZIP2 1 "9 June 2003"
.SH NAME
bunzip2 \- a block-sorting file decompressor, v1.0.2gs1
.br
bzcat \- decompresses files to stdout
.br
bzip2recover \- recovers data from damaged bzip2 files
.SH SYNOPSIS
.br
.B bunzip2
.RB [ " \-fkvsVL " ]
[
.I "filenames \&..."
]
.br
.B bzcat
.RB [ " \-s " ]
[
.I "filenames \&..."
]
.br
.B bzip2recover
.I "filename"
.SH DESCRIPTION
.I bunzip2
decompresses files created by
.I bzip2
using the Burrows-Wheeler block sorting
text compression algorithm, and Huffman coding.
.I bzip2
generally achieves
considerably better compression than that achieved by more conventional
LZ77/LZ78-based compressors, and approaches the performance of the PPM
family of statistical compressors.
.LP
The command-line options are deliberately very similar to
those of
.I GNU
.I gunzip,
but they are not identical.
.LP
.I bunzip2
will by default not overwrite existing
files. If you want this to happen, specify the \-f flag.
.LP
.I bunzip2
decompresses all specified files. Files which were not created by
.I bzip2
will be detected and ignored, and a warning issued.
.I bunzip2
attempts to guess the filename for the decompressed file
from that of the compressed file as follows:
.LP
.nf
filename.bz2 becomes filename
filename.bz becomes filename
filename.tbz2 becomes filename.tar
filename.tbz becomes filename.tar
anyothername becomes anyothername.out
.fi
.LP
If the file does not end in one of the recognised endings,
.I .bz2,
.I .bz,
.I .tbz2
or
.I .tbz,
.I bunzip2
complains that it cannot
guess the name of the original file, and uses the original name
with
.I .out
appended.
.LP
Supplying no filenames causes decompression from
standard input to standard output.
.LP
File name handling is
naive in the sense that there is no mechanism for preserving original
file names, permissions, ownerships or dates in operating systems or
filesystems which lack these concepts, or have serious file name length
restrictions, such as MS-DOS or GS/OS.
.LP
.I bunzip2
will correctly decompress a file which is the
concatenation of two or more compressed files. The result is the
concatenation of the corresponding uncompressed files. Integrity
testing (\-t)
of concatenated
compressed files is also supported.
.LP
You can also decompress files to the standard output by
giving the \-c flag. Multiple files may be
decompressed like this. The resulting outputs are fed sequentially to stdout.
.LP
.I bzcat
(or
.I bunzip2
.I \-c)
decompresses all specified files to
the standard output.
.LP
.I bunzip2
will read arguments from the environment variables
.I BZIP2
and
.I BZIP,
in that order, and will process them
before any arguments read from the command line. This gives a
convenient way to supply default arguments.
.LP
As a self-check for your protection,
.I bzip2
and
.I bunzip2
use 32-bit CRCs to
make sure that the decompressed version of a file is identical to the
original. This guards against corruption of the compressed data, and
against undetected bugs in
.I bzip2
and
.I bunzip2
(hopefully very unlikely). The
chances of data corruption going undetected are microscopic, about one
chance in four billion for each file processed. Be aware, though, that
the check occurs upon decompression, so it can only tell you that
something is wrong. It can't help you
recover the original uncompressed
data. You can use
.I bzip2recover
to try to recover data from
damaged files.
.LP
This manual page pertains to version 1.0.2gs1 of
.I bunzip2.
It is fully campatible with compressed data created with all of the previous
public releases of bzip2, versions
0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1, as well as version 1.0.2.
.LP
Return values: 0 for a normal exit, 1 for environmental problems (file
not found, invalid flags, I/O errors, &c), 2 to indicate a corrupt
compressed file, 3 for an internal consistency error (eg, bug) which
caused
.I bunzip2
to panic.
.LP
.SH OPTIONS
.IP "\fB\-c\fP \fB\--stdout\fP"
Decompress to standard output.
.IP "\fB\-d\fP \fB\--decompress\fP"
Force decompression. This flag is unnecessary on bunzip2 for GNO,
since it always decompresses.
.IP "\fB\-t\fP \fB\--test\fP"
Check integrity of the specified file(s), but don't decompress them.
This really performs a trial decompression and throws away the result.
.IP "\fB\-f\fP \fB\--force\fP"
Force overwrite of output files. Normally,
.I bunzip2
will not overwrite
existing output files.
.sp
.I bunzip2
normally declines to decompress files which don't have the
correct magic header bytes. If forced (-f), however, it will pass
such files through unmodified. This is how GNU gzip behaves.
.IP "\fB\-k\fP \fB\--keep\fP"
Keep (don't delete) input files during decompression.
.IP "\fB\-s\fP \fB\--small\fP"
Reduce memory usage, for decompression and testing. Files
are decompressed and tested using a modified algorithm which only
requires 2.5 bytes per block byte. This means any file can be
decompressed in 2300k of memory, albeit at about half the normal speed.
.sp
In short, if your machine is low on memory (5 megabytes or
less), you will probably need to use \-s. See MEMORY MANAGEMENT below.
.IP "\fB\-q\fP \fB\--quiet\fP"
Suppress non-essential warning messages. Messages pertaining to
I/O errors and other critical events will not be suppressed.
.IP "\fB\-v\fP \fB\--verbose\fP"
Verbose mode -- show the compression ratio for each file processed.
Further \-v's increase the verbosity level, spewing out lots of
information which is primarily of interest for diagnostic purposes.
.IP "\fB\-L\fP \fB\--license\fP \fB\-V\fP \fB\--version\fP"
Display the software version, license terms and conditions.
.IP "\fB\--\fP"
Treats all subsequent arguments as file names, even if they start
with a dash. This is so you can handle files with names beginning
with a dash, for example: bunzip2 \-- \-myfilename.
.LP
.SH MEMORY MANAGEMENT
.I bzip2
compresses large files in blocks. The block size affects
both the compression ratio achieved, and the amount of memory needed for
compression and decompression. The block size can be specified
to be 100,000 bytes through 900,000 bytes (the
default). At decompression time, the block size used for
compression is read from the header of the compressed file, and
.I bunzip2
then allocates itself just enough memory to decompress
the file.
.LP
Decompression requirements, in bytes, can be estimated as:
.LP
.nf
100k + ( 4 x block size ), or
100k + ( 2.5 x block size ) if using \-s
.fi
.LP
For files compressed with the default 900k block size,
.I bunzip2
will require about 3700 kbytes to decompress. To support decompression
of any file on a 4 megabyte machine,
.I bunzip2
has an option to
decompress using approximately half this amount of memory, about 2300
kbytes. Decompression speed is also halved, so you should use this
option only where necessary. The relevant flag is -s.
.LP
Decompression speeds are virtually unaffected by block size.
.LP
Another significant point applies to files which fit in a single block
-- that means most files you'd encounter using a large block size. The
amount of real memory touched is proportional to the size of the file,
since the file is smaller than a block. For example, compressing a file
20,000 bytes long with a 900k block size will cause the decompressor to
allocate 3700k but only touch 100k + 20000 * 4 = 180 kbytes
when decompressing it.
.LP
Here is a table which summarises the maximum memory usage for different
block sizes. Also recorded is the total compressed size for 14 files of
the Calgary Text Compression Corpus totalling 3,141,622 bytes. This
column gives some feel for how compression varies with block size.
These figures tend to understate the advantage of larger block sizes for
larger files, since the Corpus is dominated by smaller files.
.LP
.nf
Block Decompress Decompress Corpus
Size usage -s usage Size
.fi
.LP
.nf
100k 500k 350k 914704
200k 900k 600k 877703
300k 1300k 850k 860338
400k 1700k 1100k 846899
500k 2100k 1350k 845160
600k 2500k 1600k 838626
700k 2900k 1850k 834096
800k 3300k 2100k 828642
900k 3700k 2350k 828642
.fi
.LP
.SH RECOVERING DATA FROM DAMAGED FILES
.I bzip2
compresses files in blocks, usually 900kbytes long. Each
block is handled independently. If a media or transmission error causes
a multi-block .bz2
file to become damaged, it may be possible to
recover data from the undamaged blocks in the file.
.LP
The compressed representation of each block is delimited by a 48-bit
pattern, which makes it possible to find the block boundaries with
reasonable certainty. Each block also carries its own 32-bit CRC, so
damaged blocks can be distinguished from undamaged ones.
.LP
.I bzip2recover
is a simple program whose purpose is to search for blocks in .bz2 files,
and write each block out into its own .bz2 file. You can then use
.I bunzip2
\-t
to test the
integrity of the resulting files, and decompress those which are
undamaged.
.LP
.I bzip2recover
takes a single argument, the name of the damaged file,
and writes a number of files named "rec0001file.bz2",
"rec0002file.bz2", etc, containing the extracted blocks.
The output filenames are designed so that the use of
wildcards in subsequent processing -- for example,
"bunzip2 -c rec*file.bz2 > recovered_data" -- processes the files in
the correct order.
.LP
.I bzip2recover
should be of most use dealing with large .bz2
files, as these will contain many blocks. It is clearly
futile to use it on damaged single-block files, since a
damaged block cannot be recovered. If you wish to minimise
any potential data loss through media or transmission errors,
you might consider compressing with a smaller
block size.
.LP
.SH PERFORMANCE NOTES
.I bunzip2
usually allocates several megabytes of memory to operate
in, and then charges all over it in a fairly random fashion. This means
that performance is largely determined by the speed at which your machine can
access main memory or (if you have a caching accelerator) serve cache misses.
Because of this, small changes to the code to reduce the miss rate have
been observed to give disproportionately large performance improvements.
I imagine that
.I bunzip2
will perform best on machines with very large caches.
.LP
.SH CAVEATS
I/O error messages are not as helpful as they could be.
.I bunzip2
tries hard to detect I/O errors and exit cleanly, but the details of
what the problem is sometimes seem rather misleading.
.LP
.I bzip2recover
for GNO uses 32-bit integers to represent bit positions in compressed files,
so it cannot handle compressed files more than 512 megabytes long.
.LP
.SH AUTHOR
Julian Seward, jseward@acm.org.
.LP
http://sources.redhat.com/bzip2
.LP
The ideas embodied in
.I bzip2
are due to (at least) the following
people: Michael Burrows and David Wheeler (for the block sorting
transformation), David Wheeler (again, for the Huffman coder), Peter
Fenwick (for the structured coding model in the original
.I bzip,
and many refinements), and Alistair Moffat, Radford Neal and Ian Witten
(for the arithmetic coder in the original
.I bzip).
I am much
indebted for their help, support and advice. See the manual in the
source distribution for pointers to sources of documentation. Christian
von Roques encouraged me to look for faster sorting algorithms, so as to
speed up compression. Bela Lubkin encouraged me to improve the
worst-case compression performance. Many people sent patches, helped
with portability problems, lent machines, gave advice and were generally
helpful.
.LP
This version of
.I bunzip2
for GNO has been ported by Stephen Heumann <sheumann@myrealbox.com> from
Julian Seward's
.I bzip2
version 1.0.2 for other platforms.
.LP
This program contains material from the ORCA/C Run-Time Libraries,
copyright 1987-1996 by Byte Works, Inc. Used with permission.
.LP
It also incorporates a public domain stristr routine by Fred Cole,
Bob Stout, and Greg Thayer, which was obtained from http://www.snippets.org .

13
bunzip2.desc Normal file
View File

@ -0,0 +1,13 @@
Name: bunzip2
Version: 1.0.2
Shell: GNO/ME
Author: Stephen Heumann (GNO port of original code by Julian Seward)
Contact: sheumann@myrealbox.com
Where: /usr/local/bin
FTP: ftp.gno.org
Decompression program for files compressed in the bzip2 format. Based
on Julian Seward's bzip2 program, but only supports file decompression and
testing, not compression. Can also be used as bzcat, writing decompressed
data to stdout. Also includes bzip2recover program for restoring data
from partially corrupted bzip2 archives.

15
bunzip2.rez Normal file
View File

@ -0,0 +1,15 @@
#include "/lang/orca/libraries/rinclude/Types.Rez"
resource rVersion (0x1, purgeable3, nocrossbank) {
{ 1, 0, 2, /* version 1.0.2 */
release, /* development|alpha|beta|final|release */
0 /* non-final release number */
},
verUS, /* country code -- only some are avail */
"bunzip2", /* name */
/* _Very_ brief descrition. Check "file info" */
/* shown in the Finder to see if it's too long */
/* Note that \n is used to separate lines here. */
"Bzip2 archive decompression program\n"
};

1
bzcat.1 Normal file
View File

@ -0,0 +1 @@
.so man1/bunzip2.1

76
bzdiff
View File

@ -1,76 +0,0 @@
#!/bin/sh
# sh is buggy on RS/6000 AIX 3.2. Replace above line with #!/bin/ksh
# Bzcmp/diff wrapped for bzip2,
# adapted from zdiff by Philippe Troin <phil@fifi.org> for Debian GNU/Linux.
# Bzcmp and bzdiff are used to invoke the cmp or the diff pro-
# gram on compressed files. All options specified are passed
# directly to cmp or diff. If only 1 file is specified, then
# the files compared are file1 and an uncompressed file1.gz.
# If two files are specified, then they are uncompressed (if
# necessary) and fed to cmp or diff. The exit status from cmp
# or diff is preserved.
PATH="/usr/bin:$PATH"; export PATH
prog=`echo $0 | sed 's|.*/||'`
case "$prog" in
*cmp) comp=${CMP-cmp} ;;
*) comp=${DIFF-diff} ;;
esac
OPTIONS=
FILES=
for ARG
do
case "$ARG" in
-*) OPTIONS="$OPTIONS $ARG";;
*) if test -f "$ARG"; then
FILES="$FILES $ARG"
else
echo "${prog}: $ARG not found or not a regular file"
exit 1
fi ;;
esac
done
if test -z "$FILES"; then
echo "Usage: $prog [${comp}_options] file [file]"
exit 1
fi
tmp=`tempfile -d /tmp -p bz` || {
echo 'cannot create a temporary file' >&2
exit 1
}
set $FILES
if test $# -eq 1; then
FILE=`echo "$1" | sed 's/.bz2$//'`
bzip2 -cd "$FILE.bz2" | $comp $OPTIONS - "$FILE"
STAT="$?"
elif test $# -eq 2; then
case "$1" in
*.bz2)
case "$2" in
*.bz2)
F=`echo "$2" | sed 's|.*/||;s|.bz2$||'`
bzip2 -cdfq "$2" > $tmp
bzip2 -cdfq "$1" | $comp $OPTIONS - $tmp
STAT="$?"
/bin/rm -f $tmp;;
*) bzip2 -cdfq "$1" | $comp $OPTIONS - "$2"
STAT="$?";;
esac;;
*) case "$2" in
*.bz2)
bzip2 -cdfq "$2" | $comp $OPTIONS "$1" -
STAT="$?";;
*) $comp $OPTIONS "$1" "$2"
STAT="$?";;
esac;;
esac
exit "$STAT"
else
echo "Usage: $prog [${comp}_options] file [file]"
exit 1
fi

View File

@ -1,47 +0,0 @@
\"Shamelessly copied from zmore.1 by Philippe Troin <phil@fifi.org>
\"for Debian GNU/Linux
.TH BZDIFF 1
.SH NAME
bzcmp, bzdiff \- compare bzip2 compressed files
.SH SYNOPSIS
.B bzcmp
[ cmp_options ] file1
[ file2 ]
.br
.B bzdiff
[ diff_options ] file1
[ file2 ]
.SH DESCRIPTION
.I Bzcmp
and
.I bzdiff
are used to invoke the
.I cmp
or the
.I diff
program on bzip2 compressed files. All options specified are passed
directly to
.I cmp
or
.IR diff "."
If only 1 file is specified, then the files compared are
.I file1
and an uncompressed
.IR file1 ".bz2."
If two files are specified, then they are uncompressed if necessary and fed to
.I cmp
or
.IR diff "."
The exit status from
.I cmp
or
.I diff
is preserved.
.SH "SEE ALSO"
cmp(1), diff(1), bzmore(1), bzless(1), bzgrep(1), bzip2(1)
.SH BUGS
Messages from the
.I cmp
or
.I diff
programs refer to temporary filenames instead of those specified.

71
bzgrep
View File

@ -1,71 +0,0 @@
#!/bin/sh
# Bzgrep wrapped for bzip2,
# adapted from zgrep by Philippe Troin <phil@fifi.org> for Debian GNU/Linux.
## zgrep notice:
## zgrep -- a wrapper around a grep program that decompresses files as needed
## Adapted from a version sent by Charles Levert <charles@comm.polymtl.ca>
PATH="/usr/bin:$PATH"; export PATH
prog=`echo $0 | sed 's|.*/||'`
case "$prog" in
*egrep) grep=${EGREP-egrep} ;;
*fgrep) grep=${FGREP-fgrep} ;;
*) grep=${GREP-grep} ;;
esac
pat=""
while test $# -ne 0; do
case "$1" in
-e | -f) opt="$opt $1"; shift; pat="$1"
if test "$grep" = grep; then # grep is buggy with -e on SVR4
grep=egrep
fi;;
-A | -B) opt="$opt $1 $2"; shift;;
-*) opt="$opt $1";;
*) if test -z "$pat"; then
pat="$1"
else
break;
fi;;
esac
shift
done
if test -z "$pat"; then
echo "grep through bzip2 files"
echo "usage: $prog [grep_options] pattern [files]"
exit 1
fi
list=0
silent=0
op=`echo "$opt" | sed -e 's/ //g' -e 's/-//g'`
case "$op" in
*l*) list=1
esac
case "$op" in
*h*) silent=1
esac
if test $# -eq 0; then
bzip2 -cdfq | $grep $opt "$pat"
exit $?
fi
res=0
for i do
if test -f "$i"; then :; else if test -f "$i.bz2"; then i="$i.bz2"; fi; fi
if test $list -eq 1; then
bzip2 -cdfq "$i" | $grep $opt "$pat" 2>&1 > /dev/null && echo $i
r=$?
elif test $# -eq 1 -o $silent -eq 1; then
bzip2 -cdfq "$i" | $grep $opt "$pat"
r=$?
else
bzip2 -cdfq "$i" | $grep $opt "$pat" | sed "s|^|${i}:|"
r=$?
fi
test "$r" -ne 0 && res="$r"
done
exit $res

View File

@ -1,56 +0,0 @@
\"Shamelessly copied from zmore.1 by Philippe Troin <phil@fifi.org>
\"for Debian GNU/Linux
.TH BZGREP 1
.SH NAME
bzgrep, bzfgrep, bzegrep \- search possibly bzip2 compressed files for a regular expression
.SH SYNOPSIS
.B bzgrep
[ grep_options ]
.BI [\ -e\ ] " pattern"
.IR filename ".\|.\|."
.br
.B bzegrep
[ egrep_options ]
.BI [\ -e\ ] " pattern"
.IR filename ".\|.\|."
.br
.B bzfgrep
[ fgrep_options ]
.BI [\ -e\ ] " pattern"
.IR filename ".\|.\|."
.SH DESCRIPTION
.IR Bzgrep
is used to invoke the
.I grep
on bzip2-compressed files. All options specified are passed directly to
.I grep.
If no file is specified, then the standard input is decompressed
if necessary and fed to grep.
Otherwise the given files are uncompressed if necessary and fed to
.I grep.
.PP
If
.I bzgrep
is invoked as
.I bzegrep
or
.I bzfgrep
then
.I egrep
or
.I fgrep
is used instead of
.I grep.
If the GREP environment variable is set,
.I bzgrep
uses it as the
.I grep
program to be invoked. For example:
for sh: GREP=fgrep bzgrep string files
for csh: (setenv GREP fgrep; bzgrep string files)
.SH AUTHOR
Charles Levert (charles@comm.polymtl.ca). Adapted to bzip2 by Philippe
Troin <phil@fifi.org> for Debian GNU/Linux.
.SH "SEE ALSO"
grep(1), egrep(1), fgrep(1), bzdiff(1), bzmore(1), bzless(1), bzip2(1)

453
bzip2.1
View File

@ -1,453 +0,0 @@
.PU
.TH bzip2 1
.SH NAME
bzip2, bunzip2 \- a block-sorting file compressor, v1.0.2
.br
bzcat \- decompresses files to stdout
.br
bzip2recover \- recovers data from damaged bzip2 files
.SH SYNOPSIS
.ll +8
.B bzip2
.RB [ " \-cdfkqstvzVL123456789 " ]
[
.I "filenames \&..."
]
.ll -8
.br
.B bunzip2
.RB [ " \-fkvsVL " ]
[
.I "filenames \&..."
]
.br
.B bzcat
.RB [ " \-s " ]
[
.I "filenames \&..."
]
.br
.B bzip2recover
.I "filename"
.SH DESCRIPTION
.I bzip2
compresses files using the Burrows-Wheeler block sorting
text compression algorithm, and Huffman coding. Compression is
generally considerably better than that achieved by more conventional
LZ77/LZ78-based compressors, and approaches the performance of the PPM
family of statistical compressors.
The command-line options are deliberately very similar to
those of
.I GNU gzip,
but they are not identical.
.I bzip2
expects a list of file names to accompany the
command-line flags. Each file is replaced by a compressed version of
itself, with the name "original_name.bz2".
Each compressed file
has the same modification date, permissions, and, when possible,
ownership as the corresponding original, so that these properties can
be correctly restored at decompression time. File name handling is
naive in the sense that there is no mechanism for preserving original
file names, permissions, ownerships or dates in filesystems which lack
these concepts, or have serious file name length restrictions, such as
MS-DOS.
.I bzip2
and
.I bunzip2
will by default not overwrite existing
files. If you want this to happen, specify the \-f flag.
If no file names are specified,
.I bzip2
compresses from standard
input to standard output. In this case,
.I bzip2
will decline to
write compressed output to a terminal, as this would be entirely
incomprehensible and therefore pointless.
.I bunzip2
(or
.I bzip2 \-d)
decompresses all
specified files. Files which were not created by
.I bzip2
will be detected and ignored, and a warning issued.
.I bzip2
attempts to guess the filename for the decompressed file
from that of the compressed file as follows:
filename.bz2 becomes filename
filename.bz becomes filename
filename.tbz2 becomes filename.tar
filename.tbz becomes filename.tar
anyothername becomes anyothername.out
If the file does not end in one of the recognised endings,
.I .bz2,
.I .bz,
.I .tbz2
or
.I .tbz,
.I bzip2
complains that it cannot
guess the name of the original file, and uses the original name
with
.I .out
appended.
As with compression, supplying no
filenames causes decompression from
standard input to standard output.
.I bunzip2
will correctly decompress a file which is the
concatenation of two or more compressed files. The result is the
concatenation of the corresponding uncompressed files. Integrity
testing (\-t)
of concatenated
compressed files is also supported.
You can also compress or decompress files to the standard output by
giving the \-c flag. Multiple files may be compressed and
decompressed like this. The resulting outputs are fed sequentially to
stdout. Compression of multiple files
in this manner generates a stream
containing multiple compressed file representations. Such a stream
can be decompressed correctly only by
.I bzip2
version 0.9.0 or
later. Earlier versions of
.I bzip2
will stop after decompressing
the first file in the stream.
.I bzcat
(or
.I bzip2 -dc)
decompresses all specified files to
the standard output.
.I bzip2
will read arguments from the environment variables
.I BZIP2
and
.I BZIP,
in that order, and will process them
before any arguments read from the command line. This gives a
convenient way to supply default arguments.
Compression is always performed, even if the compressed
file is slightly
larger than the original. Files of less than about one hundred bytes
tend to get larger, since the compression mechanism has a constant
overhead in the region of 50 bytes. Random data (including the output
of most file compressors) is coded at about 8.05 bits per byte, giving
an expansion of around 0.5%.
As a self-check for your protection,
.I
bzip2
uses 32-bit CRCs to
make sure that the decompressed version of a file is identical to the
original. This guards against corruption of the compressed data, and
against undetected bugs in
.I bzip2
(hopefully very unlikely). The
chances of data corruption going undetected is microscopic, about one
chance in four billion for each file processed. Be aware, though, that
the check occurs upon decompression, so it can only tell you that
something is wrong. It can't help you
recover the original uncompressed
data. You can use
.I bzip2recover
to try to recover data from
damaged files.
Return values: 0 for a normal exit, 1 for environmental problems (file
not found, invalid flags, I/O errors, &c), 2 to indicate a corrupt
compressed file, 3 for an internal consistency error (eg, bug) which
caused
.I bzip2
to panic.
.SH OPTIONS
.TP
.B \-c --stdout
Compress or decompress to standard output.
.TP
.B \-d --decompress
Force decompression.
.I bzip2,
.I bunzip2
and
.I bzcat
are
really the same program, and the decision about what actions to take is
done on the basis of which name is used. This flag overrides that
mechanism, and forces
.I bzip2
to decompress.
.TP
.B \-z --compress
The complement to \-d: forces compression, regardless of the
invocation name.
.TP
.B \-t --test
Check integrity of the specified file(s), but don't decompress them.
This really performs a trial decompression and throws away the result.
.TP
.B \-f --force
Force overwrite of output files. Normally,
.I bzip2
will not overwrite
existing output files. Also forces
.I bzip2
to break hard links
to files, which it otherwise wouldn't do.
bzip2 normally declines to decompress files which don't have the
correct magic header bytes. If forced (-f), however, it will pass
such files through unmodified. This is how GNU gzip behaves.
.TP
.B \-k --keep
Keep (don't delete) input files during compression
or decompression.
.TP
.B \-s --small
Reduce memory usage, for compression, decompression and testing. Files
are decompressed and tested using a modified algorithm which only
requires 2.5 bytes per block byte. This means any file can be
decompressed in 2300k of memory, albeit at about half the normal speed.
During compression, \-s selects a block size of 200k, which limits
memory use to around the same figure, at the expense of your compression
ratio. In short, if your machine is low on memory (8 megabytes or
less), use \-s for everything. See MEMORY MANAGEMENT below.
.TP
.B \-q --quiet
Suppress non-essential warning messages. Messages pertaining to
I/O errors and other critical events will not be suppressed.
.TP
.B \-v --verbose
Verbose mode -- show the compression ratio for each file processed.
Further \-v's increase the verbosity level, spewing out lots of
information which is primarily of interest for diagnostic purposes.
.TP
.B \-L --license -V --version
Display the software version, license terms and conditions.
.TP
.B \-1 (or \-\-fast) to \-9 (or \-\-best)
Set the block size to 100 k, 200 k .. 900 k when compressing. Has no
effect when decompressing. See MEMORY MANAGEMENT below.
The \-\-fast and \-\-best aliases are primarily for GNU gzip
compatibility. In particular, \-\-fast doesn't make things
significantly faster.
And \-\-best merely selects the default behaviour.
.TP
.B \--
Treats all subsequent arguments as file names, even if they start
with a dash. This is so you can handle files with names beginning
with a dash, for example: bzip2 \-- \-myfilename.
.TP
.B \--repetitive-fast --repetitive-best
These flags are redundant in versions 0.9.5 and above. They provided
some coarse control over the behaviour of the sorting algorithm in
earlier versions, which was sometimes useful. 0.9.5 and above have an
improved algorithm which renders these flags irrelevant.
.SH MEMORY MANAGEMENT
.I bzip2
compresses large files in blocks. The block size affects
both the compression ratio achieved, and the amount of memory needed for
compression and decompression. The flags \-1 through \-9
specify the block size to be 100,000 bytes through 900,000 bytes (the
default) respectively. At decompression time, the block size used for
compression is read from the header of the compressed file, and
.I bunzip2
then allocates itself just enough memory to decompress
the file. Since block sizes are stored in compressed files, it follows
that the flags \-1 to \-9 are irrelevant to and so ignored
during decompression.
Compression and decompression requirements,
in bytes, can be estimated as:
Compression: 400k + ( 8 x block size )
Decompression: 100k + ( 4 x block size ), or
100k + ( 2.5 x block size )
Larger block sizes give rapidly diminishing marginal returns. Most of
the compression comes from the first two or three hundred k of block
size, a fact worth bearing in mind when using
.I bzip2
on small machines.
It is also important to appreciate that the decompression memory
requirement is set at compression time by the choice of block size.
For files compressed with the default 900k block size,
.I bunzip2
will require about 3700 kbytes to decompress. To support decompression
of any file on a 4 megabyte machine,
.I bunzip2
has an option to
decompress using approximately half this amount of memory, about 2300
kbytes. Decompression speed is also halved, so you should use this
option only where necessary. The relevant flag is -s.
In general, try and use the largest block size memory constraints allow,
since that maximises the compression achieved. Compression and
decompression speed are virtually unaffected by block size.
Another significant point applies to files which fit in a single block
-- that means most files you'd encounter using a large block size. The
amount of real memory touched is proportional to the size of the file,
since the file is smaller than a block. For example, compressing a file
20,000 bytes long with the flag -9 will cause the compressor to
allocate around 7600k of memory, but only touch 400k + 20000 * 8 = 560
kbytes of it. Similarly, the decompressor will allocate 3700k but only
touch 100k + 20000 * 4 = 180 kbytes.
Here is a table which summarises the maximum memory usage for different
block sizes. Also recorded is the total compressed size for 14 files of
the Calgary Text Compression Corpus totalling 3,141,622 bytes. This
column gives some feel for how compression varies with block size.
These figures tend to understate the advantage of larger block sizes for
larger files, since the Corpus is dominated by smaller files.
Compress Decompress Decompress Corpus
Flag usage usage -s usage Size
-1 1200k 500k 350k 914704
-2 2000k 900k 600k 877703
-3 2800k 1300k 850k 860338
-4 3600k 1700k 1100k 846899
-5 4400k 2100k 1350k 845160
-6 5200k 2500k 1600k 838626
-7 6100k 2900k 1850k 834096
-8 6800k 3300k 2100k 828642
-9 7600k 3700k 2350k 828642
.SH RECOVERING DATA FROM DAMAGED FILES
.I bzip2
compresses files in blocks, usually 900kbytes long. Each
block is handled independently. If a media or transmission error causes
a multi-block .bz2
file to become damaged, it may be possible to
recover data from the undamaged blocks in the file.
The compressed representation of each block is delimited by a 48-bit
pattern, which makes it possible to find the block boundaries with
reasonable certainty. Each block also carries its own 32-bit CRC, so
damaged blocks can be distinguished from undamaged ones.
.I bzip2recover
is a simple program whose purpose is to search for
blocks in .bz2 files, and write each block out into its own .bz2
file. You can then use
.I bzip2
\-t
to test the
integrity of the resulting files, and decompress those which are
undamaged.
.I bzip2recover
takes a single argument, the name of the damaged file,
and writes a number of files "rec00001file.bz2",
"rec00002file.bz2", etc, containing the extracted blocks.
The output filenames are designed so that the use of
wildcards in subsequent processing -- for example,
"bzip2 -dc rec*file.bz2 > recovered_data" -- processes the files in
the correct order.
.I bzip2recover
should be of most use dealing with large .bz2
files, as these will contain many blocks. It is clearly
futile to use it on damaged single-block files, since a
damaged block cannot be recovered. If you wish to minimise
any potential data loss through media or transmission errors,
you might consider compressing with a smaller
block size.
.SH PERFORMANCE NOTES
The sorting phase of compression gathers together similar strings in the
file. Because of this, files containing very long runs of repeated
symbols, like "aabaabaabaab ..." (repeated several hundred times) may
compress more slowly than normal. Versions 0.9.5 and above fare much
better than previous versions in this respect. The ratio between
worst-case and average-case compression time is in the region of 10:1.
For previous versions, this figure was more like 100:1. You can use the
\-vvvv option to monitor progress in great detail, if you want.
Decompression speed is unaffected by these phenomena.
.I bzip2
usually allocates several megabytes of memory to operate
in, and then charges all over it in a fairly random fashion. This means
that performance, both for compressing and decompressing, is largely
determined by the speed at which your machine can service cache misses.
Because of this, small changes to the code to reduce the miss rate have
been observed to give disproportionately large performance improvements.
I imagine
.I bzip2
will perform best on machines with very large caches.
.SH CAVEATS
I/O error messages are not as helpful as they could be.
.I bzip2
tries hard to detect I/O errors and exit cleanly, but the details of
what the problem is sometimes seem rather misleading.
This manual page pertains to version 1.0.2 of
.I bzip2.
Compressed data created by this version is entirely forwards and
backwards compatible with the previous public releases, versions
0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1, but with the following
exception: 0.9.0 and above can correctly decompress multiple
concatenated compressed files. 0.1pl2 cannot do this; it will stop
after decompressing just the first file in the stream.
.I bzip2recover
versions prior to this one, 1.0.2, used 32-bit integers to represent
bit positions in compressed files, so it could not handle compressed
files more than 512 megabytes long. Version 1.0.2 and above uses
64-bit ints on some platforms which support them (GNU supported
targets, and Windows). To establish whether or not bzip2recover was
built with such a limitation, run it without arguments. In any event
you can build yourself an unlimited version if you can recompile it
with MaybeUInt64 set to be an unsigned 64-bit integer.
.SH AUTHOR
Julian Seward, jseward@acm.org.
http://sources.redhat.com/bzip2
The ideas embodied in
.I bzip2
are due to (at least) the following
people: Michael Burrows and David Wheeler (for the block sorting
transformation), David Wheeler (again, for the Huffman coder), Peter
Fenwick (for the structured coding model in the original
.I bzip,
and many refinements), and Alistair Moffat, Radford Neal and Ian Witten
(for the arithmetic coder in the original
.I bzip).
I am much
indebted for their help, support and advice. See the manual in the
source distribution for pointers to sources of documentation. Christian
von Roques encouraged me to look for faster sorting algorithms, so as to
speed up compression. Bela Lubkin encouraged me to improve the
worst-case compression performance.
The bz* scripts are derived from those of GNU gzip.
Many people sent patches, helped
with portability problems, lent machines, gave advice and were generally
helpful.

View File

@ -1,398 +0,0 @@
bzip2(1) bzip2(1)
NNAAMMEE
bzip2, bunzip2 - a block-sorting file compressor, v1.0.2
bzcat - decompresses files to stdout
bzip2recover - recovers data from damaged bzip2 files
SSYYNNOOPPSSIISS
bbzziipp22 [ --ccddffkkqqssttvvzzVVLL112233445566778899 ] [ _f_i_l_e_n_a_m_e_s _._._. ]
bbuunnzziipp22 [ --ffkkvvssVVLL ] [ _f_i_l_e_n_a_m_e_s _._._. ]
bbzzccaatt [ --ss ] [ _f_i_l_e_n_a_m_e_s _._._. ]
bbzziipp22rreeccoovveerr _f_i_l_e_n_a_m_e
DDEESSCCRRIIPPTTIIOONN
_b_z_i_p_2 compresses files using the Burrows-Wheeler block
sorting text compression algorithm, and Huffman coding.
Compression is generally considerably better than that
achieved by more conventional LZ77/LZ78-based compressors,
and approaches the performance of the PPM family of sta­
tistical compressors.
The command-line options are deliberately very similar to
those of _G_N_U _g_z_i_p_, but they are not identical.
_b_z_i_p_2 expects a list of file names to accompany the com­
mand-line flags. Each file is replaced by a compressed
version of itself, with the name "original_name.bz2".
Each compressed file has the same modification date, per­
missions, and, when possible, ownership as the correspond­
ing original, so that these properties can be correctly
restored at decompression time. File name handling is
naive in the sense that there is no mechanism for preserv­
ing original file names, permissions, ownerships or dates
in filesystems which lack these concepts, or have serious
file name length restrictions, such as MS-DOS.
_b_z_i_p_2 and _b_u_n_z_i_p_2 will by default not overwrite existing
files. If you want this to happen, specify the -f flag.
If no file names are specified, _b_z_i_p_2 compresses from
standard input to standard output. In this case, _b_z_i_p_2
will decline to write compressed output to a terminal, as
this would be entirely incomprehensible and therefore
pointless.
_b_u_n_z_i_p_2 (or _b_z_i_p_2 _-_d_) decompresses all specified files.
Files which were not created by _b_z_i_p_2 will be detected and
ignored, and a warning issued. _b_z_i_p_2 attempts to guess
the filename for the decompressed file from that of the
compressed file as follows:
filename.bz2 becomes filename
filename.bz becomes filename
filename.tbz2 becomes filename.tar
filename.tbz becomes filename.tar
anyothername becomes anyothername.out
If the file does not end in one of the recognised endings,
_._b_z_2_, _._b_z_, _._t_b_z_2 or _._t_b_z_, _b_z_i_p_2 complains that it cannot
guess the name of the original file, and uses the original
name with _._o_u_t appended.
As with compression, supplying no filenames causes decom­
pression from standard input to standard output.
_b_u_n_z_i_p_2 will correctly decompress a file which is the con­
catenation of two or more compressed files. The result is
the concatenation of the corresponding uncompressed files.
Integrity testing (-t) of concatenated compressed files is
also supported.
You can also compress or decompress files to the standard
output by giving the -c flag. Multiple files may be com­
pressed and decompressed like this. The resulting outputs
are fed sequentially to stdout. Compression of multiple
files in this manner generates a stream containing multi­
ple compressed file representations. Such a stream can be
decompressed correctly only by _b_z_i_p_2 version 0.9.0 or
later. Earlier versions of _b_z_i_p_2 will stop after decom­
pressing the first file in the stream.
_b_z_c_a_t (or _b_z_i_p_2 _-_d_c_) decompresses all specified files to
the standard output.
_b_z_i_p_2 will read arguments from the environment variables
_B_Z_I_P_2 and _B_Z_I_P_, in that order, and will process them
before any arguments read from the command line. This
gives a convenient way to supply default arguments.
Compression is always performed, even if the compressed
file is slightly larger than the original. Files of less
than about one hundred bytes tend to get larger, since the
compression mechanism has a constant overhead in the
region of 50 bytes. Random data (including the output of
most file compressors) is coded at about 8.05 bits per
byte, giving an expansion of around 0.5%.
As a self-check for your protection, _b_z_i_p_2 uses 32-bit
CRCs to make sure that the decompressed version of a file
is identical to the original. This guards against corrup­
tion of the compressed data, and against undetected bugs
in _b_z_i_p_2 (hopefully very unlikely). The chances of data
corruption going undetected is microscopic, about one
chance in four billion for each file processed. Be aware,
though, that the check occurs upon decompression, so it
can only tell you that something is wrong. It can't help
you recover the original uncompressed data. You can use
_b_z_i_p_2_r_e_c_o_v_e_r to try to recover data from damaged files.
Return values: 0 for a normal exit, 1 for environmental
problems (file not found, invalid flags, I/O errors, &c),
2 to indicate a corrupt compressed file, 3 for an internal
consistency error (eg, bug) which caused _b_z_i_p_2 to panic.
OOPPTTIIOONNSS
--cc ----ssttddoouutt
Compress or decompress to standard output.
--dd ----ddeeccoommpprreessss
Force decompression. _b_z_i_p_2_, _b_u_n_z_i_p_2 and _b_z_c_a_t are
really the same program, and the decision about
what actions to take is done on the basis of which
name is used. This flag overrides that mechanism,
and forces _b_z_i_p_2 to decompress.
--zz ----ccoommpprreessss
The complement to -d: forces compression,
regardless of the invocation name.
--tt ----tteesstt
Check integrity of the specified file(s), but don't
decompress them. This really performs a trial
decompression and throws away the result.
--ff ----ffoorrccee
Force overwrite of output files. Normally, _b_z_i_p_2
will not overwrite existing output files. Also
forces _b_z_i_p_2 to break hard links to files, which it
otherwise wouldn't do.
bzip2 normally declines to decompress files which
don't have the correct magic header bytes. If
forced (-f), however, it will pass such files
through unmodified. This is how GNU gzip behaves.
--kk ----kkeeeepp
Keep (don't delete) input files during compression
or decompression.
--ss ----ssmmaallll
Reduce memory usage, for compression, decompression
and testing. Files are decompressed and tested
using a modified algorithm which only requires 2.5
bytes per block byte. This means any file can be
decompressed in 2300k of memory, albeit at about
half the normal speed.
During compression, -s selects a block size of
200k, which limits memory use to around the same
figure, at the expense of your compression ratio.
In short, if your machine is low on memory (8
megabytes or less), use -s for everything. See
MEMORY MANAGEMENT below.
--qq ----qquuiieett
Suppress non-essential warning messages. Messages
pertaining to I/O errors and other critical events
will not be suppressed.
--vv ----vveerrbboossee
Verbose mode -- show the compression ratio for each
file processed. Further -v's increase the ver­
bosity level, spewing out lots of information which
is primarily of interest for diagnostic purposes.
--LL ----lliicceennssee --VV ----vveerrssiioonn
Display the software version, license terms and
conditions.
--11 ((oorr ----ffaasstt)) ttoo --99 ((oorr ----bbeesstt))
Set the block size to 100 k, 200 k .. 900 k when
compressing. Has no effect when decompressing.
See MEMORY MANAGEMENT below. The --fast and --best
aliases are primarily for GNU gzip compatibility.
In particular, --fast doesn't make things signifi­
cantly faster. And --best merely selects the
default behaviour.
---- Treats all subsequent arguments as file names, even
if they start with a dash. This is so you can han­
dle files with names beginning with a dash, for
example: bzip2 -- -myfilename.
----rreeppeettiittiivvee--ffaasstt ----rreeppeettiittiivvee--bbeesstt
These flags are redundant in versions 0.9.5 and
above. They provided some coarse control over the
behaviour of the sorting algorithm in earlier ver­
sions, which was sometimes useful. 0.9.5 and above
have an improved algorithm which renders these
flags irrelevant.
MMEEMMOORRYY MMAANNAAGGEEMMEENNTT
_b_z_i_p_2 compresses large files in blocks. The block size
affects both the compression ratio achieved, and the
amount of memory needed for compression and decompression.
The flags -1 through -9 specify the block size to be
100,000 bytes through 900,000 bytes (the default) respec­
tively. At decompression time, the block size used for
compression is read from the header of the compressed
file, and _b_u_n_z_i_p_2 then allocates itself just enough memory
to decompress the file. Since block sizes are stored in
compressed files, it follows that the flags -1 to -9 are
irrelevant to and so ignored during decompression.
Compression and decompression requirements, in bytes, can
be estimated as:
Compression: 400k + ( 8 x block size )
Decompression: 100k + ( 4 x block size ), or
100k + ( 2.5 x block size )
Larger block sizes give rapidly diminishing marginal
returns. Most of the compression comes from the first two
or three hundred k of block size, a fact worth bearing in
mind when using _b_z_i_p_2 on small machines. It is also
important to appreciate that the decompression memory
requirement is set at compression time by the choice of
block size.
For files compressed with the default 900k block size,
_b_u_n_z_i_p_2 will require about 3700 kbytes to decompress. To
support decompression of any file on a 4 megabyte machine,
_b_u_n_z_i_p_2 has an option to decompress using approximately
half this amount of memory, about 2300 kbytes. Decompres­
sion speed is also halved, so you should use this option
only where necessary. The relevant flag is -s.
In general, try and use the largest block size memory con­
straints allow, since that maximises the compression
achieved. Compression and decompression speed are virtu­
ally unaffected by block size.
Another significant point applies to files which fit in a
single block -- that means most files you'd encounter
using a large block size. The amount of real memory
touched is proportional to the size of the file, since the
file is smaller than a block. For example, compressing a
file 20,000 bytes long with the flag -9 will cause the
compressor to allocate around 7600k of memory, but only
touch 400k + 20000 * 8 = 560 kbytes of it. Similarly, the
decompressor will allocate 3700k but only touch 100k +
20000 * 4 = 180 kbytes.
Here is a table which summarises the maximum memory usage
for different block sizes. Also recorded is the total
compressed size for 14 files of the Calgary Text Compres­
sion Corpus totalling 3,141,622 bytes. This column gives
some feel for how compression varies with block size.
These figures tend to understate the advantage of larger
block sizes for larger files, since the Corpus is domi­
nated by smaller files.
Compress Decompress Decompress Corpus
Flag usage usage -s usage Size
-1 1200k 500k 350k 914704
-2 2000k 900k 600k 877703
-3 2800k 1300k 850k 860338
-4 3600k 1700k 1100k 846899
-5 4400k 2100k 1350k 845160
-6 5200k 2500k 1600k 838626
-7 6100k 2900k 1850k 834096
-8 6800k 3300k 2100k 828642
-9 7600k 3700k 2350k 828642
RREECCOOVVEERRIINNGG DDAATTAA FFRROOMM DDAAMMAAGGEEDD FFIILLEESS
_b_z_i_p_2 compresses files in blocks, usually 900kbytes long.
Each block is handled independently. If a media or trans­
mission error causes a multi-block .bz2 file to become
damaged, it may be possible to recover data from the
undamaged blocks in the file.
The compressed representation of each block is delimited
by a 48-bit pattern, which makes it possible to find the
block boundaries with reasonable certainty. Each block
also carries its own 32-bit CRC, so damaged blocks can be
distinguished from undamaged ones.
_b_z_i_p_2_r_e_c_o_v_e_r is a simple program whose purpose is to
search for blocks in .bz2 files, and write each block out
into its own .bz2 file. You can then use _b_z_i_p_2 -t to test
the integrity of the resulting files, and decompress those
which are undamaged.
_b_z_i_p_2_r_e_c_o_v_e_r takes a single argument, the name of the dam­
aged file, and writes a number of files
"rec00001file.bz2", "rec00002file.bz2", etc, containing
the extracted blocks. The output filenames are
designed so that the use of wildcards in subsequent pro­
cessing -- for example, "bzip2 -dc rec*file.bz2 > recov­
ered_data" -- processes the files in the correct order.
_b_z_i_p_2_r_e_c_o_v_e_r should be of most use dealing with large .bz2
files, as these will contain many blocks. It is clearly
futile to use it on damaged single-block files, since a
damaged block cannot be recovered. If you wish to min­
imise any potential data loss through media or transmis­
sion errors, you might consider compressing with a smaller
block size.
PPEERRFFOORRMMAANNCCEE NNOOTTEESS
The sorting phase of compression gathers together similar
strings in the file. Because of this, files containing
very long runs of repeated symbols, like "aabaabaabaab
..." (repeated several hundred times) may compress more
slowly than normal. Versions 0.9.5 and above fare much
better than previous versions in this respect. The ratio
between worst-case and average-case compression time is in
the region of 10:1. For previous versions, this figure
was more like 100:1. You can use the -vvvv option to mon­
itor progress in great detail, if you want.
Decompression speed is unaffected by these phenomena.
_b_z_i_p_2 usually allocates several megabytes of memory to
operate in, and then charges all over it in a fairly ran­
dom fashion. This means that performance, both for com­
pressing and decompressing, is largely determined by the
speed at which your machine can service cache misses.
Because of this, small changes to the code to reduce the
miss rate have been observed to give disproportionately
large performance improvements. I imagine _b_z_i_p_2 will per­
form best on machines with very large caches.
CCAAVVEEAATTSS
I/O error messages are not as helpful as they could be.
_b_z_i_p_2 tries hard to detect I/O errors and exit cleanly,
but the details of what the problem is sometimes seem
rather misleading.
This manual page pertains to version 1.0.2 of _b_z_i_p_2_. Com­
pressed data created by this version is entirely forwards
and backwards compatible with the previous public
releases, versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1,
but with the following exception: 0.9.0 and above can cor­
rectly decompress multiple concatenated compressed files.
0.1pl2 cannot do this; it will stop after decompressing
just the first file in the stream.
_b_z_i_p_2_r_e_c_o_v_e_r versions prior to this one, 1.0.2, used
32-bit integers to represent bit positions in compressed
files, so it could not handle compressed files more than
512 megabytes long. Version 1.0.2 and above uses 64-bit
ints on some platforms which support them (GNU supported
targets, and Windows). To establish whether or not
bzip2recover was built with such a limitation, run it
without arguments. In any event you can build yourself an
unlimited version if you can recompile it with MaybeUInt64
set to be an unsigned 64-bit integer.
AAUUTTHHOORR
Julian Seward, jseward@acm.org.
http://sources.redhat.com/bzip2
The ideas embodied in _b_z_i_p_2 are due to (at least) the fol­
lowing people: Michael Burrows and David Wheeler (for the
block sorting transformation), David Wheeler (again, for
the Huffman coder), Peter Fenwick (for the structured cod­
ing model in the original _b_z_i_p_, and many refinements), and
Alistair Moffat, Radford Neal and Ian Witten (for the
arithmetic coder in the original _b_z_i_p_)_. I am much
indebted for their help, support and advice. See the man­
ual in the source distribution for pointers to sources of
documentation. Christian von Roques encouraged me to look
for faster sorting algorithms, so as to speed up compres­
sion. Bela Lubkin encouraged me to improve the worst-case
compression performance. The bz* scripts are derived from
those of GNU gzip. Many people sent patches, helped with
portability problems, lent machines, gave advice and were
generally helpful.
bzip2(1)

213
bzip2.c
View File

@ -3,6 +3,11 @@
/*--- A block-sorting, lossless compressor bzip2.c ---*/
/*-----------------------------------------------------------*/
/*-- Modified for use under GNO by Stephen Heumann --*/
#ifdef __ORCAC__
segment "bzip2";
#endif
/*--
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
@ -148,6 +153,14 @@
#include <ctype.h>
#include "bzlib.h"
#ifdef __appleiigs__
#include <gsos.h>
char *stristr(const char *, const char *);
#if defined(__GNO__) && defined(__STACK_CHECK__)
#include <gno/gno.h>
#endif
#endif
#define ERROR_IF_EOF(i) { if ((i) == EOF) ioError(); }
#define ERROR_IF_NOT_ZERO(i) { if ((i) != 0) ioError(); }
#define ERROR_IF_MINUS_ONE(i) { if ((i) == (-1)) ioError(); }
@ -213,6 +226,11 @@
ERROR_IF_MINUS_ONE ( retVal ); \
} while ( 0 )
# endif
# ifdef __GNO__
# undef SET_BINARY_MODE
# define SET_BINARY_MODE(fd) fsetbinary(fd);
# endif
#endif /* BZ_UNIX */
@ -220,7 +238,10 @@
#if BZ_LCCWIN32
# include <io.h>
# include <fcntl.h>
# include <sys\stat.h>
/* This was "#include <sys\stat.h>", but ORCA/C complains
about an invalid character, so I changed it. This
might possibly break compilation on Win 32 systems. */
# include <sys/stat.h>
# define NORETURN /**/
# define PATH_SEP '\\'
@ -253,8 +274,15 @@
typedef char Char;
typedef unsigned char Bool;
typedef unsigned char UChar;
#ifdef __ORCAC__
typedef long Int32;
typedef unsigned long UInt32;
# define Int32_FMT "%ld"
#else
typedef int Int32;
typedef unsigned int UInt32;
# define Int32_FMT "%d"
#endif /* defined __ORCAC__ */
typedef short Int16;
typedef unsigned short UInt16;
@ -386,7 +414,11 @@ static
void uInt64_toAscii ( char* outbuf, UInt64* n )
{
Int32 i, q;
#ifdef __ORCAC__
static UChar buf[32];
#else
UChar buf[32];
#endif
Int32 nBuf = 0;
UInt64 n_copy = *n;
do {
@ -416,15 +448,24 @@ Bool myfeof ( FILE* f )
/*---------------------------------------------*/
#ifndef __ORCAC__
static
void compressStream ( FILE *stream, FILE *zStream )
{
BZFILE* bzf = NULL;
#ifdef __ORCAC__
static UChar ibuf[5000];
#else
UChar ibuf[5000];
#endif
Int32 nIbuf;
UInt32 nbytes_in_lo32, nbytes_in_hi32;
UInt32 nbytes_out_lo32, nbytes_out_hi32;
#ifdef __ORCAC__
Int16 bzerr, bzerr_dummy, ret;
#else
Int32 bzerr, bzerr_dummy, ret;
#endif
SET_BINARY_MODE(stream);
SET_BINARY_MODE(zStream);
@ -513,6 +554,7 @@ void compressStream ( FILE *stream, FILE *zStream )
panic ( "compress:end" );
/*notreached*/
}
#endif
@ -521,10 +563,18 @@ static
Bool uncompressStream ( FILE *zStream, FILE *stream )
{
BZFILE* bzf = NULL;
#ifdef __ORCAC__
Int16 bzerr, bzerr_dummy;
Int32 ret, nread, streamNo, i;
static UChar obuf[5000];
static UChar unused[BZ_MAX_UNUSED];
Int16 nUnused;
#else
Int32 bzerr, bzerr_dummy, ret, nread, streamNo, i;
UChar obuf[5000];
UChar unused[BZ_MAX_UNUSED];
Int32 nUnused;
#endif
UChar* unusedTmp;
nUnused = 0;
@ -635,10 +685,18 @@ static
Bool testStream ( FILE *zStream )
{
BZFILE* bzf = NULL;
#ifdef __ORCAC__
Int16 bzerr, bzerr_dummy, ret;
Int32 nread, streamNo, i;
static UChar obuf[5000];
static UChar unused[BZ_MAX_UNUSED];
Int16 nUnused;
#else
Int32 bzerr, bzerr_dummy, ret, nread, streamNo, i;
UChar obuf[5000];
UChar unused[BZ_MAX_UNUSED];
Int32 nUnused;
#endif
UChar* unusedTmp;
nUnused = 0;
@ -802,7 +860,11 @@ void cleanUpAndFail ( Int32 ec )
"%s: `%s' may be incomplete.\n",
progName, outName );
fprintf ( stderr,
#ifndef __GNO__
"%s: I suggest doing an integrity test (bzip2 -tv)"
#else
"%s: I suggest doing an integrity test (bunzip2 -tv)"
#endif
" of it.\n",
progName );
}
@ -811,7 +873,7 @@ void cleanUpAndFail ( Int32 ec )
if (noisy && numFileNames > 0 && numFilesProcessed < numFileNames) {
fprintf ( stderr,
"%s: WARNING: some files have not been processed:\n"
"%s: %d specified on command line, %d not processed yet.\n\n",
"%s: " Int32_FMT " specified on command line, " Int32_FMT " not processed yet.\n\n",
progName, progName,
numFileNames, numFileNames - numFilesProcessed );
}
@ -827,8 +889,16 @@ void panic ( Char* s )
fprintf ( stderr,
"\n%s: PANIC -- internal consistency error:\n"
"\t%s\n"
#ifndef __GNO__
"\tThis is a BUG. Please report it to me at:\n"
"\tjseward@acm.org\n",
#else
"\tThis is a BUG. If you are experiencing it only in\n"
"the GNO version of bunzip2, please report it to me at\n"
"sheumann@myrealbox.com . If you can duplicate it in\n"
"other versions of bzip2 as well, please report it to\n"
"the original author Julian Seward at tjseward@acm.org\n",
#endif
progName, s );
showFileNames();
cleanUpAndFail( 3 );
@ -880,6 +950,10 @@ void ioError ( void )
/*---------------------------------------------*/
#ifdef __ORCAC__
#pragma databank 1
#endif
static
void mySignalCatcher ( IntNative n )
{
@ -889,11 +963,14 @@ void mySignalCatcher ( IntNative n )
cleanUpAndFail(1);
}
/* This function should never be called on a normal GNO system,
but it doesn't hurt to leave it in. */
/*---------------------------------------------*/
static
void mySIGSEGVorSIGBUScatcher ( IntNative n )
{
#ifndef __ORCAC__
/* Not needed for decompression */
if (opMode == OM_Z)
fprintf (
stderr,
@ -915,7 +992,10 @@ void mySIGSEGVorSIGBUScatcher ( IntNative n )
" have the manual or can't be bothered to read it, mail me anyway.\n"
"\n",
progName );
else
else
#else
if (opMode != OM_Z)
#endif
fprintf (
stderr,
"\n%s: Caught a SIGSEGV or SIGBUS whilst decompressing.\n"
@ -945,6 +1025,10 @@ void mySIGSEGVorSIGBUScatcher ( IntNative n )
{ cadvise(); cleanUpAndFail( 2 ); }
}
#ifdef __ORCAC__
#pragma databank 0
#endif
/*---------------------------------------------*/
static
@ -1000,10 +1084,10 @@ void copyFileName ( Char* to, Char* from )
if ( strlen(from) > FILE_NAME_LEN-10 ) {
fprintf (
stderr,
"bzip2: file name\n`%s'\n"
"%s: file name\n`%s'\n"
"is suspiciously (more than %d chars) long.\n"
"Try using a reasonable file name instead. Sorry! :-)\n",
from, FILE_NAME_LEN-10
progName, from, FILE_NAME_LEN-10
);
setExit(1);
exit(exitValue);
@ -1137,13 +1221,43 @@ void applySavedMetaInfoToOutputFile ( Char *dstName )
retVal = chmod ( dstName, fileMetaInfo.st_mode );
ERROR_IF_NOT_ZERO ( retVal );
#ifndef __ORCAC__
/* ORCA/C's localtime(), which is called by utime(), is broken.
* We fix this by simply disabling time setting, as bzip2 does
* on non-Unix platforms anyway. A better solution would be
* to fix or replace utime() and/or localtime().
*/
retVal = utime ( dstName, &uTimBuf );
ERROR_IF_NOT_ZERO ( retVal );
#endif
#ifdef __appleiigs__
/* Set filetype to BIN if running on the GS */
{
static GSString255 fileNameStringGS;
static FileInfoRecGS infoRec = { 4, /* pCount */
&fileNameStringGS, /* Ptr to file name */
0x00C3, /* access restrictions (none) */
0x06, /* filetype (BIN) */
0x0000 /* auxtype ($0000) */
};
if (strlen( dstName ) <= 255) {
strncpy( fileNameStringGS.text, dstName, 255 );
fileNameStringGS.length = strlen( dstName );
SetFileInfo( &infoRec );
/* Ignore any errors produced by this call, leaving the file's
existing filetype intact. This parallels the approach taken
when setting file attributes on Unix.
*/
}
}
#else
retVal = chown ( dstName, fileMetaInfo.st_uid, fileMetaInfo.st_gid );
/* chown() will in many cases return with EPERM, which can
be safely ignored.
*/
#endif /* defined __GNO__ */
# endif
}
@ -1181,8 +1295,14 @@ Bool hasSuffix ( Char* s, Char* suffix )
{
Int32 ns = strlen(s);
Int32 nx = strlen(suffix);
if (ns < nx) return False;
if (ns < nx) return False;
#ifndef __appleiigs__
if (strcmp(s + ns - nx, suffix) == 0) return True;
#else
/* Filenames are case-insensitive on the GS,
so use a case-insensitive compare for them */
if (strcasecmp(s + ns - nx, suffix) == 0) return True;
#endif
return False;
}
@ -1198,6 +1318,7 @@ Bool mapSuffix ( Char* name,
/*---------------------------------------------*/
#ifndef __ORCAC__
static
void compress ( Char *name )
{
@ -1279,7 +1400,7 @@ void compress ( Char *name )
}
if ( srcMode == SM_F2F && !forceOverwrite &&
(n=countHardLinks ( inName )) > 0) {
fprintf ( stderr, "%s: Input file %s has %d other link%s.\n",
fprintf ( stderr, "%s: Input file %s has " Int32_FMT " other link%s.\n",
progName, inName, n, n > 1 ? "s" : "" );
setExit(1);
return;
@ -1376,6 +1497,7 @@ void compress ( Char *name )
deleteOutputOnInterrupt = False;
}
#endif
/*---------------------------------------------*/
@ -1465,7 +1587,7 @@ void uncompress ( Char *name )
}
if ( srcMode == SM_F2F && !forceOverwrite &&
(n=countHardLinks ( inName ) ) > 0) {
fprintf ( stderr, "%s: Input file %s has %d other link%s.\n",
fprintf ( stderr, "%s: Input file %s has " Int32_FMT " other link%s.\n",
progName, inName, n, n > 1 ? "s" : "" );
setExit(1);
return;
@ -1671,7 +1793,11 @@ void license ( void )
{
fprintf ( stderr,
#ifndef __ORCAC__
"bzip2, a block-sorting file compressor. "
#else
"bunzip2, a block-sorting file decompressor. "
#endif
"Version %s.\n"
" \n"
" Copyright (C) 1996-2002 by Julian Seward.\n"
@ -1684,6 +1810,14 @@ void license ( void )
" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"
" LICENSE file for more details.\n"
#ifdef __ORCAC__
" \n"
" This version of bunzip2 for GNO is based on Julian Seward's bzip2\n"
" program for other platforms, with modifications by Stephen Heumann.\n"
" \n"
" This program contains material from the ORCA/C Run-Time Libraries,\n"
" copyright 1987-1996 by Byte Works, Inc. Used with permission.\n"
#endif
" \n",
BZ2_bzlibVersion()
);
@ -1696,13 +1830,19 @@ void usage ( Char *fullProgName )
{
fprintf (
stderr,
#ifndef __ORCAC__
"bzip2, a block-sorting file compressor. "
#else
"bunzip2, a block-sorting file decompressor. "
#endif
"Version %s.\n"
"\n usage: %s [flags and input files in any order]\n"
"\n"
" -h --help print this message\n"
" -d --decompress force decompression\n"
#ifndef __ORCAC__
" -z --compress force compression\n"
#endif
" -k --keep keep (don't delete) input files\n"
" -f --force overwrite existing output files\n"
" -t --test test compressed file integrity\n"
@ -1712,17 +1852,33 @@ void usage ( Char *fullProgName )
" -L --license display software version & license\n"
" -V --version display software version & license\n"
" -s --small use less memory (at most 2500k)\n"
#ifndef __ORCAC__
" -1 .. -9 set block size to 100k .. 900k\n"
" --fast alias for -1\n"
" --best alias for -9\n"
#endif
"\n"
#ifndef __ORCAC__
" If invoked as `bzip2', default action is to compress.\n"
" as `bunzip2', default action is to decompress.\n"
#else
" If invoked as 'bunzip2', default action is to decompress.\n"
#endif
" as `bzcat', default action is to decompress to stdout.\n"
"\n"
#ifndef __ORCAC__
" If no file names are given, bzip2 compresses or decompresses\n"
" from standard input to standard output. You can combine\n"
" short flags, so `-v -4' means the same as -v4 or -4v, &c.\n"
#else
" If no file names are given, bunzip2 decompresses from standard\n"
" input to standard output. You can combine short flags, so\n"
" `-v -4' means the same as -v4 or -4v, &c.\n"
"\n"
" This version of bunzip2 for GNO is based on the bzip2 program for\n"
" other platforms; however, it has all compression functionality\n"
" disabled and will only decompress or test compressed files.\n"
#endif
# if BZ_UNIX
"\n"
# endif
@ -1794,6 +1950,10 @@ Cell *mkCell ( void )
/*---------------------------------------------*/
#ifdef __ORCAC__
#pragma optimize 119
#endif
static
Cell *snocString ( Cell *root, Char *name )
{
@ -1810,6 +1970,10 @@ Cell *snocString ( Cell *root, Char *name )
}
}
#ifdef __ORCAC__
#pragma optimize -1
#endif
/*---------------------------------------------*/
static
@ -1850,6 +2014,11 @@ IntNative main ( IntNative argc, Char *argv[] )
Cell *aa;
Bool decode;
#if defined(__GNO__) && defined(__STACK_CHECK__)
__REPORT_STACK();
fprintf(stderr, "Stack checking on\n");
#endif
/*-- Be really really really paranoid :-) --*/
if (sizeof(Int32) != 4 || sizeof(UInt32) != 4 ||
sizeof(Int16) != 2 || sizeof(UInt16) != 2 ||
@ -1920,6 +2089,7 @@ IntNative main ( IntNative argc, Char *argv[] )
/*-- Determine what to do (compress/uncompress/test/cat). --*/
/*-- Note that subsequent flag handling may change this. --*/
#ifndef __ORCAC__
opMode = OM_Z;
if ( (strstr ( progName, "unzip" ) != 0) ||
@ -1934,6 +2104,23 @@ IntNative main ( IntNative argc, Char *argv[] )
srcMode = (numFileNames == 0) ? SM_I2O : SM_F2O;
}
#else
/* GNO modifications: Decompress by default, and use case-insensitive
compares for filenames, in keeping with the normal practice on the GS */
opMode = OM_UNZ;
if (stristr ( progName, "bzip" ) != 0)
opMode = OM_Z;
if (stristr ( progName, "unzip" ) != 0)
opMode = OM_UNZ;
if ( (stristr ( progName, "z2cat" ) != 0) ||
(stristr ( progName, "zcat" ) != 0) ) {
opMode = OM_UNZ;
srcMode = (numFileNames == 0) ? SM_I2O : SM_F2O;
}
#endif
/*-- Look at the flags. --*/
for (aa = argList; aa != NULL; aa = aa->link) {
@ -2026,6 +2213,7 @@ IntNative main ( IntNative argc, Char *argv[] )
}
if (opMode == OM_Z) {
#ifndef __ORCAC__
if (srcMode == SM_I2O) {
compress ( NULL );
} else {
@ -2037,6 +2225,13 @@ IntNative main ( IntNative argc, Char *argv[] )
compress ( aa->name );
}
}
#else
fprintf ( stderr,
"%s: Cannot compress data. The GNO version of bunzip2 does\n"
"%s: not support compression, only decompression and testing.\n",
progName, progName );
cleanUpAndFail( 4 );
#endif
}
else

390
bzip2.txt
View File

@ -1,390 +0,0 @@
NAME
bzip2, bunzip2 - a block-sorting file compressor, v1.0.2
bzcat - decompresses files to stdout
bzip2recover - recovers data from damaged bzip2 files
SYNOPSIS
bzip2 [ -cdfkqstvzVL123456789 ] [ filenames ... ]
bunzip2 [ -fkvsVL ] [ filenames ... ]
bzcat [ -s ] [ filenames ... ]
bzip2recover filename
DESCRIPTION
bzip2 compresses files using the Burrows-Wheeler block
sorting text compression algorithm, and Huffman coding.
Compression is generally considerably better than that
achieved by more conventional LZ77/LZ78-based compressors,
and approaches the performance of the PPM family of sta­
tistical compressors.
The command-line options are deliberately very similar to
those of GNU gzip, but they are not identical.
bzip2 expects a list of file names to accompany the com­
mand-line flags. Each file is replaced by a compressed
version of itself, with the name "original_name.bz2".
Each compressed file has the same modification date, per­
missions, and, when possible, ownership as the correspond­
ing original, so that these properties can be correctly
restored at decompression time. File name handling is
naive in the sense that there is no mechanism for preserv­
ing original file names, permissions, ownerships or dates
in filesystems which lack these concepts, or have serious
file name length restrictions, such as MS-DOS.
bzip2 and bunzip2 will by default not overwrite existing
files. If you want this to happen, specify the -f flag.
If no file names are specified, bzip2 compresses from
standard input to standard output. In this case, bzip2
will decline to write compressed output to a terminal, as
this would be entirely incomprehensible and therefore
pointless.
bunzip2 (or bzip2 -d) decompresses all specified files.
Files which were not created by bzip2 will be detected and
ignored, and a warning issued. bzip2 attempts to guess
the filename for the decompressed file from that of the
compressed file as follows:
filename.bz2 becomes filename
filename.bz becomes filename
filename.tbz2 becomes filename.tar
filename.tbz becomes filename.tar
anyothername becomes anyothername.out
If the file does not end in one of the recognised endings,
.bz2, .bz, .tbz2 or .tbz, bzip2 complains that it cannot
guess the name of the original file, and uses the original
name with .out appended.
As with compression, supplying no filenames causes decom­
pression from standard input to standard output.
bunzip2 will correctly decompress a file which is the con­
catenation of two or more compressed files. The result is
the concatenation of the corresponding uncompressed files.
Integrity testing (-t) of concatenated compressed files is
also supported.
You can also compress or decompress files to the standard
output by giving the -c flag. Multiple files may be com­
pressed and decompressed like this. The resulting outputs
are fed sequentially to stdout. Compression of multiple
files in this manner generates a stream containing multi­
ple compressed file representations. Such a stream can be
decompressed correctly only by bzip2 version 0.9.0 or
later. Earlier versions of bzip2 will stop after decom­
pressing the first file in the stream.
bzcat (or bzip2 -dc) decompresses all specified files to
the standard output.
bzip2 will read arguments from the environment variables
BZIP2 and BZIP, in that order, and will process them
before any arguments read from the command line. This
gives a convenient way to supply default arguments.
Compression is always performed, even if the compressed
file is slightly larger than the original. Files of less
than about one hundred bytes tend to get larger, since the
compression mechanism has a constant overhead in the
region of 50 bytes. Random data (including the output of
most file compressors) is coded at about 8.05 bits per
byte, giving an expansion of around 0.5%.
As a self-check for your protection, bzip2 uses 32-bit
CRCs to make sure that the decompressed version of a file
is identical to the original. This guards against corrup­
tion of the compressed data, and against undetected bugs
in bzip2 (hopefully very unlikely). The chances of data
corruption going undetected is microscopic, about one
chance in four billion for each file processed. Be aware,
though, that the check occurs upon decompression, so it
can only tell you that something is wrong. It can't help
you recover the original uncompressed data. You can use
bzip2recover to try to recover data from damaged files.
Return values: 0 for a normal exit, 1 for environmental
problems (file not found, invalid flags, I/O errors, &c),
2 to indicate a corrupt compressed file, 3 for an internal
consistency error (eg, bug) which caused bzip2 to panic.
OPTIONS
-c --stdout
Compress or decompress to standard output.
-d --decompress
Force decompression. bzip2, bunzip2 and bzcat are
really the same program, and the decision about
what actions to take is done on the basis of which
name is used. This flag overrides that mechanism,
and forces bzip2 to decompress.
-z --compress
The complement to -d: forces compression,
regardless of the invocation name.
-t --test
Check integrity of the specified file(s), but don't
decompress them. This really performs a trial
decompression and throws away the result.
-f --force
Force overwrite of output files. Normally, bzip2
will not overwrite existing output files. Also
forces bzip2 to break hard links to files, which it
otherwise wouldn't do.
bzip2 normally declines to decompress files which
don't have the correct magic header bytes. If
forced (-f), however, it will pass such files
through unmodified. This is how GNU gzip behaves.
-k --keep
Keep (don't delete) input files during compression
or decompression.
-s --small
Reduce memory usage, for compression, decompression
and testing. Files are decompressed and tested
using a modified algorithm which only requires 2.5
bytes per block byte. This means any file can be
decompressed in 2300k of memory, albeit at about
half the normal speed.
During compression, -s selects a block size of
200k, which limits memory use to around the same
figure, at the expense of your compression ratio.
In short, if your machine is low on memory (8
megabytes or less), use -s for everything. See
MEMORY MANAGEMENT below.
-q --quiet
Suppress non-essential warning messages. Messages
pertaining to I/O errors and other critical events
will not be suppressed.
-v --verbose
Verbose mode -- show the compression ratio for each
file processed. Further -v's increase the ver­
bosity level, spewing out lots of information which
is primarily of interest for diagnostic purposes.
-L --license -V --version
Display the software version, license terms and
conditions.
-1 (or --fast) to -9 (or --best)
Set the block size to 100 k, 200 k .. 900 k when
compressing. Has no effect when decompressing.
See MEMORY MANAGEMENT below. The --fast and --best
aliases are primarily for GNU gzip compatibility.
In particular, --fast doesn't make things signifi­
cantly faster. And --best merely selects the
default behaviour.
-- Treats all subsequent arguments as file names, even
if they start with a dash. This is so you can han­
dle files with names beginning with a dash, for
example: bzip2 -- -myfilename.
--repetitive-fast --repetitive-best
These flags are redundant in versions 0.9.5 and
above. They provided some coarse control over the
behaviour of the sorting algorithm in earlier ver­
sions, which was sometimes useful. 0.9.5 and above
have an improved algorithm which renders these
flags irrelevant.
MEMORY MANAGEMENT
bzip2 compresses large files in blocks. The block size
affects both the compression ratio achieved, and the
amount of memory needed for compression and decompression.
The flags -1 through -9 specify the block size to be
100,000 bytes through 900,000 bytes (the default) respec­
tively. At decompression time, the block size used for
compression is read from the header of the compressed
file, and bunzip2 then allocates itself just enough memory
to decompress the file. Since block sizes are stored in
compressed files, it follows that the flags -1 to -9 are
irrelevant to and so ignored during decompression.
Compression and decompression requirements, in bytes, can
be estimated as:
Compression: 400k + ( 8 x block size )
Decompression: 100k + ( 4 x block size ), or
100k + ( 2.5 x block size )
Larger block sizes give rapidly diminishing marginal
returns. Most of the compression comes from the first two
or three hundred k of block size, a fact worth bearing in
mind when using bzip2 on small machines. It is also
important to appreciate that the decompression memory
requirement is set at compression time by the choice of
block size.
For files compressed with the default 900k block size,
bunzip2 will require about 3700 kbytes to decompress. To
support decompression of any file on a 4 megabyte machine,
bunzip2 has an option to decompress using approximately
half this amount of memory, about 2300 kbytes. Decompres­
sion speed is also halved, so you should use this option
only where necessary. The relevant flag is -s.
In general, try and use the largest block size memory con­
straints allow, since that maximises the compression
achieved. Compression and decompression speed are virtu­
ally unaffected by block size.
Another significant point applies to files which fit in a
single block -- that means most files you'd encounter
using a large block size. The amount of real memory
touched is proportional to the size of the file, since the
file is smaller than a block. For example, compressing a
file 20,000 bytes long with the flag -9 will cause the
compressor to allocate around 7600k of memory, but only
touch 400k + 20000 * 8 = 560 kbytes of it. Similarly, the
decompressor will allocate 3700k but only touch 100k +
20000 * 4 = 180 kbytes.
Here is a table which summarises the maximum memory usage
for different block sizes. Also recorded is the total
compressed size for 14 files of the Calgary Text Compres­
sion Corpus totalling 3,141,622 bytes. This column gives
some feel for how compression varies with block size.
These figures tend to understate the advantage of larger
block sizes for larger files, since the Corpus is domi­
nated by smaller files.
Compress Decompress Decompress Corpus
Flag usage usage -s usage Size
-1 1200k 500k 350k 914704
-2 2000k 900k 600k 877703
-3 2800k 1300k 850k 860338
-4 3600k 1700k 1100k 846899
-5 4400k 2100k 1350k 845160
-6 5200k 2500k 1600k 838626
-7 6100k 2900k 1850k 834096
-8 6800k 3300k 2100k 828642
-9 7600k 3700k 2350k 828642
RECOVERING DATA FROM DAMAGED FILES
bzip2 compresses files in blocks, usually 900kbytes long.
Each block is handled independently. If a media or trans­
mission error causes a multi-block .bz2 file to become
damaged, it may be possible to recover data from the
undamaged blocks in the file.
The compressed representation of each block is delimited
by a 48-bit pattern, which makes it possible to find the
block boundaries with reasonable certainty. Each block
also carries its own 32-bit CRC, so damaged blocks can be
distinguished from undamaged ones.
bzip2recover is a simple program whose purpose is to
search for blocks in .bz2 files, and write each block out
into its own .bz2 file. You can then use bzip2 -t to test
the integrity of the resulting files, and decompress those
which are undamaged.
bzip2recover takes a single argument, the name of the dam­
aged file, and writes a number of files
"rec00001file.bz2", "rec00002file.bz2", etc, containing
the extracted blocks. The output filenames are
designed so that the use of wildcards in subsequent pro­
cessing -- for example, "bzip2 -dc rec*file.bz2 > recov­
ered_data" -- processes the files in the correct order.
bzip2recover should be of most use dealing with large .bz2
files, as these will contain many blocks. It is clearly
futile to use it on damaged single-block files, since a
damaged block cannot be recovered. If you wish to min­
imise any potential data loss through media or transmis­
sion errors, you might consider compressing with a smaller
block size.
PERFORMANCE NOTES
The sorting phase of compression gathers together similar
strings in the file. Because of this, files containing
very long runs of repeated symbols, like "aabaabaabaab
..." (repeated several hundred times) may compress more
slowly than normal. Versions 0.9.5 and above fare much
better than previous versions in this respect. The ratio
between worst-case and average-case compression time is in
the region of 10:1. For previous versions, this figure
was more like 100:1. You can use the -vvvv option to mon­
itor progress in great detail, if you want.
Decompression speed is unaffected by these phenomena.
bzip2 usually allocates several megabytes of memory to
operate in, and then charges all over it in a fairly ran­
dom fashion. This means that performance, both for com­
pressing and decompressing, is largely determined by the
speed at which your machine can service cache misses.
Because of this, small changes to the code to reduce the
miss rate have been observed to give disproportionately
large performance improvements. I imagine bzip2 will per­
form best on machines with very large caches.
CAVEATS
I/O error messages are not as helpful as they could be.
bzip2 tries hard to detect I/O errors and exit cleanly,
but the details of what the problem is sometimes seem
rather misleading.
This manual page pertains to version 1.0.2 of bzip2. Com­
pressed data created by this version is entirely forwards
and backwards compatible with the previous public
releases, versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1,
but with the following exception: 0.9.0 and above can cor­
rectly decompress multiple concatenated compressed files.
0.1pl2 cannot do this; it will stop after decompressing
just the first file in the stream.
bzip2recover versions prior to this one, 1.0.2, used
32-bit integers to represent bit positions in compressed
files, so it could not handle compressed files more than
512 megabytes long. Version 1.0.2 and above uses 64-bit
ints on some platforms which support them (GNU supported
targets, and Windows). To establish whether or not
bzip2recover was built with such a limitation, run it
without arguments. In any event you can build yourself an
unlimited version if you can recompile it with MaybeUInt64
set to be an unsigned 64-bit integer.
AUTHOR
Julian Seward, jseward@acm.org.
http://sources.redhat.com/bzip2
The ideas embodied in bzip2 are due to (at least) the fol­
lowing people: Michael Burrows and David Wheeler (for the
block sorting transformation), David Wheeler (again, for
the Huffman coder), Peter Fenwick (for the structured cod­
ing model in the original bzip, and many refinements), and
Alistair Moffat, Radford Neal and Ian Witten (for the
arithmetic coder in the original bzip). I am much
indebted for their help, support and advice. See the man­
ual in the source distribution for pointers to sources of
documentation. Christian von Roques encouraged me to look
for faster sorting algorithms, so as to speed up compres­
sion. Bela Lubkin encouraged me to improve the worst-case
compression performance. The bz* scripts are derived from
those of GNU gzip. Many people sent patches, helped with
portability problems, lent machines, gave advice and were
generally helpful.

1
bzip2recover.1 Normal file
View File

@ -0,0 +1 @@
.so man1/bunzip2.1

View File

@ -4,6 +4,8 @@
/*--- bzip2recover.c ---*/
/*-----------------------------------------------------------*/
/*-- Modified for use under GNO by Stephen Heumann --*/
/*--
This program is bzip2recover, a program to attempt data
salvage from damaged files created by the accompanying
@ -56,7 +58,14 @@
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#ifdef __appleiigs__
#include <gsos.h>
#if defined(__GNO__) && defined(__STACK_CHECK__)
#include <gno/gno.h>
#endif
#endif
/* This program records bit locations in the file to be recovered.
That means that if 64-bit ints are not supported, we will not
@ -74,14 +83,28 @@
#ifdef _MSC_VER
typedef unsigned __int64 MaybeUInt64;
# define MaybeUInt64_FMT "%I64u"
#else
#ifdef __ORCAC__
typedef unsigned long MaybeUInt64;
# define MaybeUInt64_FMT "%lu"
#else
typedef unsigned int MaybeUInt64;
# define MaybeUInt64_FMT "%u"
#endif
#endif
#endif
typedef unsigned int UInt32;
typedef int Int32;
#ifdef __ORCAC__
typedef unsigned long UInt32;
typedef long Int32;
# define Int32_FMT "%ld"
# define size_t_FMT "%lu"
#else
typedef unsigned int UInt32;
typedef int Int32;
# define Int32_FMT "%d"
# define size_t_FMT "%d"
#endif /* defined __ORCAC__ */
typedef unsigned char UChar;
typedef char Char;
typedef unsigned char Bool;
@ -143,7 +166,7 @@ void writeError ( void )
void mallocFail ( Int32 n )
{
fprintf ( stderr,
"%s: malloc failed on request for %d bytes.\n",
"%s: malloc failed on request for " Int32_FMT " bytes.\n",
progName, n );
fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
progName );
@ -155,7 +178,7 @@ void mallocFail ( Int32 n )
void tooManyBlocks ( Int32 max_handled_blocks )
{
fprintf ( stderr,
"%s: `%s' appears to contain more than %d blocks\n",
"%s: `%s' appears to contain more than " Int32_FMT " blocks\n",
progName, inFileName, max_handled_blocks );
fprintf ( stderr,
"%s: and cannot be handled. To fix, increase\n",
@ -296,8 +319,13 @@ Bool endsInBz2 ( Char* name )
if (n <= 4) return False;
return
(name[n-4] == '.' &&
#ifdef __GNO__
(name[n-3] == 'b' || name[n-3] == 'B') &&
(name[n-2] == 'z' || name[n-3] == 'Z') &&
#else
name[n-3] == 'b' &&
name[n-2] == 'z' &&
#endif
name[n-1] == '2');
}
@ -313,6 +341,10 @@ Bool endsInBz2 ( Char* name )
# define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */
#endif
#ifdef __appleiigs__
# define BZ_SPLIT_SYM_GS ':' /* possible path splitter on GS/OS */
#endif
#define BLOCK_HEADER_HI 0x00003141UL
#define BLOCK_HEADER_LO 0x59265359UL
@ -323,14 +355,28 @@ Bool endsInBz2 ( Char* name )
would have an uncompressed size of at least 40GB, so the chances
are low you'll need to up this.
*/
/* STH - Values larger than 5369 (actually a bit less than that)
are useless when MaybeUInt64 is 32 bits.
*/
#ifdef __ORCAC__
#define BZ_MAX_HANDLED_BLOCKS 5369
#else
#define BZ_MAX_HANDLED_BLOCKS 50000
#endif
#ifndef __ORCAC__
MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS];
MaybeUInt64 bEnd [BZ_MAX_HANDLED_BLOCKS];
MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS];
MaybeUInt64 rbEnd [BZ_MAX_HANDLED_BLOCKS];
#else /* if defined __GNO__ */
MaybeUInt64 *bStart;
MaybeUInt64 *bEnd;
MaybeUInt64 *rbStart;
MaybeUInt64 *rbEnd;
#endif
Int32 main ( Int32 argc, Char** argv )
int main ( int argc, Char** argv )
{
FILE* inFile;
FILE* outFile;
@ -341,11 +387,19 @@ Int32 main ( Int32 argc, Char** argv )
UInt32 buffHi, buffLo, blockCRC;
Char* p;
#if defined(__GNO__) && defined(__STACK_CHECK__)
__REPORT_STACK();
#endif
strcpy ( progName, argv[0] );
inFileName[0] = outFileName[0] = 0;
fprintf ( stderr,
fprintf ( stderr,
#ifdef __GNO__
"bzip2recover 1.0.2gs1: extracts blocks from damaged .bz2 files.\n" );
#else
"bzip2recover 1.0.2: extracts blocks from damaged .bz2 files.\n" );
#endif
if (argc != 2) {
fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
@ -358,9 +412,11 @@ Int32 main ( Int32 argc, Char** argv )
case 4:
fprintf(stderr,
"\trestrictions on size of recovered file: 512 MB\n");
#ifndef __ORCAC__
fprintf(stderr,
"\tto circumvent, recompile with MaybeUInt64 as an\n"
"\tunsigned 64-bit int.\n");
#endif
break;
default:
fprintf(stderr,
@ -373,7 +429,7 @@ Int32 main ( Int32 argc, Char** argv )
if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) {
fprintf ( stderr,
"%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n",
"%s: supplied filename is suspiciously (>= " size_t_FMT " chars) long. Bye!\n",
progName, strlen(argv[1]) );
exit(1);
}
@ -386,6 +442,21 @@ Int32 main ( Int32 argc, Char** argv )
exit(1);
}
/* Allocate big arrays dynamically so we can use small memory model. These aren't
explicitly free()'d anywhere, but exist for the duration of the program. */
#ifdef __ORCAC__
bStart = malloc(BZ_MAX_HANDLED_BLOCKS * sizeof(MaybeUInt64));
bEnd = malloc(BZ_MAX_HANDLED_BLOCKS * sizeof(MaybeUInt64));
rbStart = malloc(BZ_MAX_HANDLED_BLOCKS * sizeof(MaybeUInt64));
rbEnd = malloc(BZ_MAX_HANDLED_BLOCKS * sizeof(MaybeUInt64));
if ((bStart == NULL) || (bEnd == NULL) ||
(rbStart == NULL) || (rbEnd == NULL)) {
fprintf ( stderr, "%s: couldn't allocate enough memory\n", progName );
exit(1);
}
#endif
bsIn = bsOpenReadStream ( inFile );
fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
@ -404,7 +475,7 @@ Int32 main ( Int32 argc, Char** argv )
(bitsRead - bStart[currBlock]) >= 40) {
bEnd[currBlock] = bitsRead-1;
if (currBlock > 0)
fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT
fprintf ( stderr, " block " Int32_FMT " runs from " MaybeUInt64_FMT
" to " MaybeUInt64_FMT " (incomplete)\n",
currBlock, bStart[currBlock], bEnd[currBlock] );
} else
@ -426,7 +497,7 @@ Int32 main ( Int32 argc, Char** argv )
}
if (currBlock > 0 &&
(bEnd[currBlock] - bStart[currBlock]) >= 130) {
fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT
fprintf ( stderr, " block " Int32_FMT " runs from " MaybeUInt64_FMT
" to " MaybeUInt64_FMT "\n",
rbCtr+1, bStart[currBlock], bEnd[currBlock] );
rbStart[rbCtr] = bStart[currBlock];
@ -496,26 +567,41 @@ Int32 main ( Int32 argc, Char** argv )
if (bitsRead == rbStart[wrBlock]) {
/* Create the output file name, correctly handling leading paths.
(31.10.2001 by Sergey E. Kusikov) */
/* Modified by STH to make it work better on GNO. It would still
be confused by files with a '/' character in their names. */
Char* split;
Int32 ofs, k;
for (k = 0; k < BZ_MAX_FILENAME; k++)
outFileName[k] = 0;
strcpy (outFileName, inFileName);
#ifdef __appleiigs__
split = ((strrchr (outFileName, BZ_SPLIT_SYM_GS) >
strrchr (outFileName, BZ_SPLIT_SYM)) ?
strrchr (outFileName, BZ_SPLIT_SYM_GS) :
strrchr (outFileName, BZ_SPLIT_SYM));
#else
split = strrchr (outFileName, BZ_SPLIT_SYM);
#endif /* defined __appleiigs__ */
if (split == NULL) {
split = outFileName;
} else {
++split;
}
/* Now split points to the start of the basename. */
ofs = split - outFileName;
ofs = split - outFileName;
/* On GS, max. block number is < 6000, and ProDOS filenames are short,
so use only four digits in output file name */
#ifdef __ORCAC__
sprintf (split, "rec%4ld", wrBlock+1);
#else
sprintf (split, "rec%5d", wrBlock+1);
#endif
for (p = split; *p != 0; p++) if (*p == ' ') *p = '0';
strcat (outFileName, inFileName + ofs);
if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
fprintf ( stderr, " writing block %d to `%s' ...\n",
fprintf ( stderr, " writing block " Int32_FMT " to `%s' ...\n",
wrBlock+1, outFileName );
outFile = fopen ( outFileName, "wb" );
@ -524,6 +610,27 @@ Int32 main ( Int32 argc, Char** argv )
progName, outFileName );
exit(1);
}
#ifdef __appleiigs__
/* Set filetype to BIN if running on the GS */
{
static GSString255 fileNameStringGS;
static FileInfoRecGS infoRec = { 4, /* pCount */
&fileNameStringGS, /* Ptr to file name */
0x00C3, /* access restrictions (none) */
0x06, /* filetype (BIN) */
0x0000 /* auxtype ($0000) */
};
if (strlen( outFileName ) <= 255) {
strncpy( fileNameStringGS.text, outFileName, 255 );
fileNameStringGS.length = strlen( outFileName );
SetFileInfo( &infoRec );
/* Ignoring any errors produced by this call */
}
}
#endif
bsWr = bsOpenWriteStream ( outFile );
bsPutUChar ( bsWr, BZ_HDR_B );
bsPutUChar ( bsWr, BZ_HDR_Z );
@ -535,7 +642,7 @@ Int32 main ( Int32 argc, Char** argv )
}
}
fprintf ( stderr, "%s: finished\n", progName );
fprintf ( stderr, "%s: finished\n", progName );
return 0;
}

15
bzip2recover.rez Normal file
View File

@ -0,0 +1,15 @@
#include "/lang/orca/libraries/rinclude/Types.Rez"
resource rVersion (0x1, purgeable3, nocrossbank) {
{ 1, 0, 2, /* version 1.0.2 */
release, /* development|alpha|beta|final|release */
0 /* non-final release number */
},
verUS, /* country code -- only some are avail */
"bzip2recover", /* name */
/* _Very_ brief descrition. Check "file info" */
/* shown in the Finder to see if it's too long */
/* Note that \n is used to separate lines here. */
"Bzip2 archive recovery program"
};

173
bzlib.c
View File

@ -4,6 +4,11 @@
/*--- bzlib.c ---*/
/*-------------------------------------------------------------*/
/*-- Modified for use under GNO by Stephen Heumann --*/
#ifdef __ORCAC__
segment "bzip2";
#endif
/*--
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
@ -85,7 +90,18 @@
#ifndef BZ_NO_STDIO
void BZ2_bz__AssertH__fail ( int errcode )
{
fprintf(stderr,
fprintf(stderr,
#ifdef __GNO__
"\n\nbunzip2/libbzip2: internal error number %d.\n"
"This is a bug in bunzip2/libbzip2, %s.\n"
"If you are experiencing it only in the GNO version of bunzip2,\n"
"please report it to me at sheumann@myrealbox.com . If you can\n"
"duplicate it in other versions of bzip2 as well, please report\n"
"it to the original author Julian Seward at tjseward@acm.org .\n",
"Please make an effort to report this bug; timely and accurate\n"
"bug reports eventually lead to higher quality software. Thanks.\n"
"Stephen Heumann and Julian Seward.\n\n",
#else
"\n\nbzip2/libbzip2: internal error number %d.\n"
"This is a bug in bzip2/libbzip2, %s.\n"
"Please report it to me at: jseward@acm.org. If this happened\n"
@ -94,10 +110,15 @@ void BZ2_bz__AssertH__fail ( int errcode )
"of that program. Please make an effort to report this bug;\n"
"timely and accurate bug reports eventually lead to higher\n"
"quality software. Thanks. Julian Seward, 30 December 2001.\n\n",
#endif
errcode,
BZ2_bzlibVersion()
);
#ifndef __ORCAC__
/* Don't need this for decompression, since error 1007 is only
* produced in the blocksort routines used for compression.
*/
if (errcode == 1007) {
fprintf(stderr,
"\n*** A special note about internal error number 1007 ***\n"
@ -125,6 +146,7 @@ void BZ2_bz__AssertH__fail ( int errcode )
"\n"
);
}
#endif
exit(3);
}
@ -135,9 +157,11 @@ void BZ2_bz__AssertH__fail ( int errcode )
static
int bz_config_ok ( void )
{
#ifndef __ORCAC__
if (sizeof(int) != 4) return 0;
if (sizeof(short) != 2) return 0;
if (sizeof(char) != 1) return 0;
#endif
return 1;
}
@ -158,6 +182,7 @@ void default_bzfree ( void* opaque, void* addr )
/*---------------------------------------------------*/
#ifndef __ORCAC__
static
void prepare_new_block ( EState* s )
{
@ -169,17 +194,21 @@ void prepare_new_block ( EState* s )
for (i = 0; i < 256; i++) s->inUse[i] = False;
s->blockNo++;
}
#endif
/*---------------------------------------------------*/
#ifndef __ORCAC__
static
void init_RL ( EState* s )
{
s->state_in_ch = 256;
s->state_in_len = 0;
}
#endif
#ifndef __ORCAC__
static
Bool isempty_RL ( EState* s )
{
@ -187,9 +216,11 @@ Bool isempty_RL ( EState* s )
return False; else
return True;
}
#endif
/*---------------------------------------------------*/
#ifndef __ORCAC__
int BZ_API(BZ2_bzCompressInit)
( bz_stream* strm,
int blockSize100k,
@ -254,9 +285,11 @@ int BZ_API(BZ2_bzCompressInit)
prepare_new_block ( s );
return BZ_OK;
}
#endif
/*---------------------------------------------------*/
#ifndef __ORCAC__
static
void add_pair_to_block ( EState* s )
{
@ -267,6 +300,29 @@ void add_pair_to_block ( EState* s )
}
s->inUse[s->state_in_ch] = True;
switch (s->state_in_len) {
#ifdef __ORCAC__
case 1:
*((s->block)+(s->nblock)) = (UChar)ch; s->nblock++;
break;
case 2:
*((s->block)+(s->nblock)) = (UChar)ch; s->nblock++;
*((s->block)+(s->nblock)) = (UChar)ch; s->nblock++;
break;
case 3:
*((s->block)+(s->nblock)) = (UChar)ch; s->nblock++;
*((s->block)+(s->nblock)) = (UChar)ch; s->nblock++;
*((s->block)+(s->nblock)) = (UChar)ch; s->nblock++;
break;
default:
*((s->inUse)+(s->state_in_len-4)) = True;
*((s->block)+(s->nblock)) = (UChar)ch; s->nblock++;
*((s->block)+(s->nblock)) = (UChar)ch; s->nblock++;
*((s->block)+(s->nblock)) = (UChar)ch; s->nblock++;
*((s->block)+(s->nblock)) = (UChar)ch; s->nblock++;
*((s->block)+(s->nblock)) = ((UChar)(s->state_in_len-4));
s->nblock++;
break;
#else
case 1:
s->block[s->nblock] = (UChar)ch; s->nblock++;
break;
@ -288,20 +344,51 @@ void add_pair_to_block ( EState* s )
s->block[s->nblock] = ((UChar)(s->state_in_len-4));
s->nblock++;
break;
#endif
}
}
#endif
/*---------------------------------------------------*/
#ifndef __ORCAC__
static
void flush_RL ( EState* s )
{
if (s->state_in_ch < 256) add_pair_to_block ( s );
init_RL ( s );
}
#endif
/*---------------------------------------------------*/
#ifdef __ORCAC__
#define ADD_CHAR_TO_BLOCK(zs,zchh0) \
{ \
UInt32 zchh = (UInt32)(zchh0); \
/*-- fast track the common case --*/ \
if (zchh != zs->state_in_ch && \
zs->state_in_len == 1) { \
UChar ch = (UChar)(zs->state_in_ch); \
BZ_UPDATE_CRC( zs->blockCRC, ch ); \
*((zs->inUse)+(zs->state_in_ch)) = True; \
*((zs->block)+(zs->nblock)) = (UChar)ch; \
zs->nblock++; \
zs->state_in_ch = zchh; \
} \
else \
/*-- general, uncommon cases --*/ \
if (zchh != zs->state_in_ch || \
zs->state_in_len == 255) { \
if (zs->state_in_ch < 256) \
add_pair_to_block ( zs ); \
zs->state_in_ch = zchh; \
zs->state_in_len = 1; \
} else { \
zs->state_in_len++; \
} \
}
#else
#define ADD_CHAR_TO_BLOCK(zs,zchh0) \
{ \
UInt32 zchh = (UInt32)(zchh0); \
@ -327,9 +414,11 @@ void flush_RL ( EState* s )
zs->state_in_len++; \
} \
}
#endif
/*---------------------------------------------------*/
#ifndef __ORCAC__
static
Bool copy_input_until_stop ( EState* s )
{
@ -372,9 +461,11 @@ Bool copy_input_until_stop ( EState* s )
}
return progress_in;
}
#endif
/*---------------------------------------------------*/
#ifndef __ORCAC__
static
Bool copy_output_until_stop ( EState* s )
{
@ -399,9 +490,11 @@ Bool copy_output_until_stop ( EState* s )
return progress_out;
}
#endif
/*---------------------------------------------------*/
#ifndef __ORCAC__
static
Bool handle_compress ( bz_stream* strm )
{
@ -446,9 +539,11 @@ Bool handle_compress ( bz_stream* strm )
return progress_in || progress_out;
}
#endif
/*---------------------------------------------------*/
#ifndef __ORCAC__
int BZ_API(BZ2_bzCompress) ( bz_stream *strm, int action )
{
Bool progress;
@ -507,9 +602,11 @@ int BZ_API(BZ2_bzCompress) ( bz_stream *strm, int action )
}
return BZ_OK; /*--not reached--*/
}
#endif
/*---------------------------------------------------*/
#ifndef __ORCAC__
int BZ_API(BZ2_bzCompressEnd) ( bz_stream *strm )
{
EState* s;
@ -527,6 +624,7 @@ int BZ_API(BZ2_bzCompressEnd) ( bz_stream *strm )
return BZ_OK;
}
#endif
/*---------------------------------------------------*/
@ -636,12 +734,20 @@ void unRLE_obuf_to_output_FAST ( DState* s )
UInt32* c_tt = s->tt;
UInt32 c_tPos = s->tPos;
char* cs_next_out = s->strm->next_out;
#ifdef __ORCAC__
unsigned long cs_avail_out = s->strm->avail_out;
#else
unsigned int cs_avail_out = s->strm->avail_out;
#endif
/* end restore */
UInt32 avail_out_INIT = cs_avail_out;
Int32 s_save_nblockPP = s->save_nblock+1;
#ifdef __ORCAC__
unsigned long total_out_lo32_old;
#else
unsigned int total_out_lo32_old;
#endif
while (True) {
@ -845,7 +951,7 @@ int BZ_API(BZ2_bzDecompress) ( bz_stream *strm )
if (s->nblock_used == s->save_nblock+1 && s->state_out_len == 0) {
BZ_FINALISE_CRC ( s->calculatedBlockCRC );
if (s->verbosity >= 3)
VPrintf2 ( " {0x%x, 0x%x}", s->storedBlockCRC,
VPrintf2 ( " {" UInt32_HEXFMT ", " UInt32_HEXFMT "}", s->storedBlockCRC,
s->calculatedBlockCRC );
if (s->verbosity >= 2) VPrintf0 ( "]" );
if (s->calculatedBlockCRC != s->storedBlockCRC)
@ -863,7 +969,7 @@ int BZ_API(BZ2_bzDecompress) ( bz_stream *strm )
Int32 r = BZ2_decompress ( s );
if (r == BZ_STREAM_END) {
if (s->verbosity >= 3)
VPrintf2 ( "\n combined CRCs: stored = 0x%x, computed = 0x%x",
VPrintf2 ( "\n combined CRCs: stored = " UInt32_HEXFMT ", computed = " UInt32_HEXFMT,
s->storedCombinedCRC, s->calculatedCombinedCRC );
if (s->calculatedCombinedCRC != s->storedCombinedCRC)
return BZ_DATA_ERROR;
@ -934,6 +1040,7 @@ static Bool myfeof ( FILE* f )
/*---------------------------------------------------*/
#ifndef __ORCAC__
BZFILE* BZ_API(BZ2_bzWriteOpen)
( int* bzerror,
FILE* f,
@ -978,15 +1085,21 @@ BZFILE* BZ_API(BZ2_bzWriteOpen)
bzf->initialisedOk = True;
return bzf;
}
#endif
/*---------------------------------------------------*/
#ifndef __ORCAC__
void BZ_API(BZ2_bzWrite)
( int* bzerror,
BZFILE* b,
void* buf,
void* buf,
#ifdef __ORCAC__
long len )
#else
int len )
#endif
{
Int32 n, n2, ret;
bzFile* bzf = (bzFile*)b;
@ -1024,29 +1137,45 @@ void BZ_API(BZ2_bzWrite)
{ BZ_SETERR(BZ_OK); return; };
}
}
#endif
/*---------------------------------------------------*/
#ifndef __ORCAC__
void BZ_API(BZ2_bzWriteClose)
( int* bzerror,
BZFILE* b,
int abandon,
#ifdef __ORCAC__
unsigned long* nbytes_in,
unsigned long* nbytes_out )
#else
unsigned int* nbytes_in,
unsigned int* nbytes_out )
#endif
{
BZ2_bzWriteClose64 ( bzerror, b, abandon,
nbytes_in, NULL, nbytes_out, NULL );
}
#endif
#ifndef __ORCAC__
void BZ_API(BZ2_bzWriteClose64)
( int* bzerror,
BZFILE* b,
int abandon,
#ifdef __ORCAC__
unsigned long* nbytes_in_lo32,
unsigned long* nbytes_in_hi32,
unsigned long* nbytes_out_lo32,
unsigned long* nbytes_out_hi32 )
#else
unsigned int* nbytes_in_lo32,
unsigned int* nbytes_in_hi32,
unsigned int* nbytes_out_lo32,
unsigned int* nbytes_out_hi32 )
#endif
{
Int32 n, n2, ret;
bzFile* bzf = (bzFile*)b;
@ -1102,6 +1231,7 @@ void BZ_API(BZ2_bzWriteClose64)
BZ2_bzCompressEnd ( &(bzf->strm) );
free ( bzf );
}
#endif
/*---------------------------------------------------*/
@ -1179,11 +1309,19 @@ void BZ_API(BZ2_bzReadClose) ( int *bzerror, BZFILE *b )
/*---------------------------------------------------*/
#ifdef __ORCAC__
long BZ_API(BZ2_bzRead)
#else
int BZ_API(BZ2_bzRead)
#endif
( int* bzerror,
BZFILE* b,
void* buf,
void* buf,
#ifdef __ORCAC__
long len )
#else
int len )
#endif
{
Int32 n, ret;
bzFile* bzf = (bzFile*)b;
@ -1265,11 +1403,20 @@ void BZ_API(BZ2_bzReadGetUnused)
/*---------------------------------------------------*/
/*---------------------------------------------------*/
#ifndef __ORCAC__
int BZ_API(BZ2_bzBuffToBuffCompress)
( char* dest,
( char* dest,
#ifdef __ORCAC__
unsigned long* destLen,
#else
unsigned int* destLen,
#endif
char* source,
#ifdef __ORCAC__
unsigned long sourceLen,
#else
unsigned int sourceLen,
#endif
int blockSize100k,
int verbosity,
int workFactor )
@ -1314,14 +1461,23 @@ int BZ_API(BZ2_bzBuffToBuffCompress)
BZ2_bzCompressEnd ( &strm );
return ret;
}
#endif
/*---------------------------------------------------*/
int BZ_API(BZ2_bzBuffToBuffDecompress)
( char* dest,
#ifdef __ORCAC__
unsigned long* destLen,
#else
unsigned int* destLen,
#endif
char* source,
#ifdef __ORCAC__
unsigned long sourceLen,
#else
unsigned int sourceLen,
#endif
int small,
int verbosity )
{
@ -1390,7 +1546,9 @@ const char * BZ_API(BZ2_bzlibVersion)(void)
return BZ_VERSION;
}
/* This stuff is disabled because it may be broken under GNO due to
16-bit ints. It has not been modified to use longs where needed. */
#ifndef __ORCAC__
#ifndef BZ_NO_STDIO
/*---------------------------------------------------*/
@ -1586,6 +1744,7 @@ const char * BZ_API(BZ2_bzerror) (BZFILE *b, int *errnum)
return bzerrorstrings[err*-1];
}
#endif
#endif /* not defined __ORCAC__ */
/*-------------------------------------------------------------*/

87
bzlib.h
View File

@ -85,6 +85,27 @@ extern "C" {
#define BZ_OUTBUFF_FULL (-8)
#define BZ_CONFIG_ERROR (-9)
#ifdef __ORCAC__
typedef
struct {
char *next_in;
unsigned long avail_in;
unsigned long total_in_lo32;
unsigned long total_in_hi32;
char *next_out;
unsigned long avail_out;
unsigned long total_out_lo32;
unsigned long total_out_hi32;
void *state;
void *(*bzalloc)(void *,long,long);
void (*bzfree)(void *,void *);
void *opaque;
}
bz_stream;
#else
typedef
struct {
char *next_in;
@ -104,6 +125,7 @@ typedef
void *opaque;
}
bz_stream;
#endif
#ifndef BZ_IMPORT
@ -195,12 +217,21 @@ BZ_EXTERN void BZ_API(BZ2_bzReadGetUnused) (
int* nUnused
);
#ifdef __ORCAC__
BZ_EXTERN long BZ_API(BZ2_bzRead) (
int* bzerror,
BZFILE* b,
void* buf,
long len
);
#else
BZ_EXTERN int BZ_API(BZ2_bzRead) (
int* bzerror,
BZFILE* b,
void* buf,
int len
);
#endif
BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) (
int* bzerror,
@ -210,13 +241,31 @@ BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) (
int workFactor
);
#ifdef __ORCAC__
BZ_EXTERN void BZ_API(BZ2_bzWrite) (
int* bzerror,
BZFILE* b,
void* buf,
long len
);
#else
BZ_EXTERN void BZ_API(BZ2_bzWrite) (
int* bzerror,
BZFILE* b,
void* buf,
int len
);
#endif
#ifdef __ORCAC__
BZ_EXTERN void BZ_API(BZ2_bzWriteClose) (
int* bzerror,
BZFILE* b,
int abandon,
unsigned long* nbytes_in,
unsigned long* nbytes_out
);
#else
BZ_EXTERN void BZ_API(BZ2_bzWriteClose) (
int* bzerror,
BZFILE* b,
@ -224,7 +273,19 @@ BZ_EXTERN void BZ_API(BZ2_bzWriteClose) (
unsigned int* nbytes_in,
unsigned int* nbytes_out
);
#endif
#ifdef __ORCAC__
BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) (
int* bzerror,
BZFILE* b,
int abandon,
unsigned long* nbytes_in_lo32,
unsigned long* nbytes_in_hi32,
unsigned long* nbytes_out_lo32,
unsigned long* nbytes_out_hi32
);
#else
BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) (
int* bzerror,
BZFILE* b,
@ -235,10 +296,31 @@ BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) (
unsigned int* nbytes_out_hi32
);
#endif
#endif
/*-- Utility functions --*/
#ifdef __ORCAC__
BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) (
char* dest,
unsigned long* destLen,
char* source,
unsigned long sourceLen,
int blockSize100k,
int verbosity,
int workFactor
);
BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) (
char* dest,
unsigned long* destLen,
char* source,
unsigned long sourceLen,
int small,
int verbosity
);
#else
BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) (
char* dest,
unsigned int* destLen,
@ -257,6 +339,7 @@ BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) (
int small,
int verbosity
);
#endif
/*--
@ -273,6 +356,9 @@ BZ_EXTERN const char * BZ_API(BZ2_bzlibVersion) (
void
);
/* This stuff is disabled because it may be broken under GNO due to
16-bit ints. It has not been modified to use longs where needed. */
#ifndef __ORCAC__
#ifndef BZ_NO_STDIO
BZ_EXTERN BZFILE * BZ_API(BZ2_bzopen) (
const char *path,
@ -309,6 +395,7 @@ BZ_EXTERN const char * BZ_API(BZ2_bzerror) (
int *errnum
);
#endif
#endif /* not defined __ORCAC__ */
#ifdef __cplusplus
}

View File

@ -4,6 +4,8 @@
/*--- bzlib_private.h ---*/
/*-------------------------------------------------------------*/
/*-- Modified for use under GNO by Stephen Heumann --*/
/*--
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
@ -76,13 +78,30 @@
/*-- General stuff. --*/
#ifdef __GNO__
#define BZ_VERSION "1.0.2gs1, 07-Jun-2003"
#else
#define BZ_VERSION "1.0.2, 30-Dec-2001"
#endif
typedef char Char;
typedef unsigned char Bool;
typedef unsigned char UChar;
typedef int Int32;
typedef unsigned int UInt32;
#ifdef __ORCAC__
typedef long Int32;
typedef unsigned long UInt32;
# define Int32_FMT "%ld"
# define UInt32_HEX8FMT "0x%8lx"
# define UInt32_HEXFMT "0x%lx"
# define Int32_6FMT "%6ld"
#else
typedef int Int32;
typedef unsigned int UInt32;
# define Int32_FMT "%d"
# define UInt32_HEX8FMT "0x%8x"
# define UInt32_HEXFMT "0x%x"
# define Int32_6FMT "%6d"
#endif /* defined __ORCAC__ */
typedef short Int16;
typedef unsigned short UInt16;
@ -162,7 +181,11 @@ extern void bz_internal_error ( int errcode );
/*-- Stuff for randomising repetitive blocks. --*/
#ifdef __ORCAC__
extern Int16 BZ2_rNums[512];
#else
extern Int32 BZ2_rNums[512];
#endif
#define BZ_RAND_DECLS \
Int32 rNToGo; \

61
bzmore
View File

@ -1,61 +0,0 @@
#!/bin/sh
# Bzmore wrapped for bzip2,
# adapted from zmore by Philippe Troin <phil@fifi.org> for Debian GNU/Linux.
PATH="/usr/bin:$PATH"; export PATH
prog=`echo $0 | sed 's|.*/||'`
case "$prog" in
*less) more=less ;;
*) more=more ;;
esac
if test "`echo -n a`" = "-n a"; then
# looks like a SysV system:
n1=''; n2='\c'
else
n1='-n'; n2=''
fi
oldtty=`stty -g 2>/dev/null`
if stty -cbreak 2>/dev/null; then
cb='cbreak'; ncb='-cbreak'
else
# 'stty min 1' resets eof to ^a on both SunOS and SysV!
cb='min 1 -icanon'; ncb='icanon eof ^d'
fi
if test $? -eq 0 -a -n "$oldtty"; then
trap 'stty $oldtty 2>/dev/null; exit' 0 2 3 5 10 13 15
else
trap 'stty $ncb echo 2>/dev/null; exit' 0 2 3 5 10 13 15
fi
if test $# = 0; then
if test -t 0; then
echo usage: $prog files...
else
bzip2 -cdfq | eval $more
fi
else
FIRST=1
for FILE
do
if test $FIRST -eq 0; then
echo $n1 "--More--(Next file: $FILE)$n2"
stty $cb -echo 2>/dev/null
ANS=`dd bs=1 count=1 2>/dev/null`
stty $ncb echo 2>/dev/null
echo " "
if test "$ANS" = 'e' -o "$ANS" = 'q'; then
exit
fi
fi
if test "$ANS" != 's'; then
echo "------> $FILE <------"
bzip2 -cdfq "$FILE" | eval $more
fi
if test -t; then
FIRST=0
fi
done
fi

152
bzmore.1
View File

@ -1,152 +0,0 @@
.\"Shamelessly copied from zmore.1 by Philippe Troin <phil@fifi.org>
.\"for Debian GNU/Linux
.TH BZMORE 1
.SH NAME
bzmore, bzless \- file perusal filter for crt viewing of bzip2 compressed text
.SH SYNOPSIS
.B bzmore
[ name ... ]
.br
.B bzless
[ name ... ]
.SH NOTE
In the following description,
.I bzless
and
.I less
can be used interchangeably with
.I bzmore
and
.I more.
.SH DESCRIPTION
.I Bzmore
is a filter which allows examination of compressed or plain text files
one screenful at a time on a soft-copy terminal.
.I bzmore
works on files compressed with
.I bzip2
and also on uncompressed files.
If a file does not exist,
.I bzmore
looks for a file of the same name with the addition of a .bz2 suffix.
.PP
.I Bzmore
normally pauses after each screenful, printing --More--
at the bottom of the screen.
If the user then types a carriage return, one more line is displayed.
If the user hits a space,
another screenful is displayed. Other possibilities are enumerated later.
.PP
.I Bzmore
looks in the file
.I /etc/termcap
to determine terminal characteristics,
and to determine the default window size.
On a terminal capable of displaying 24 lines,
the default window size is 22 lines.
Other sequences which may be typed when
.I bzmore
pauses, and their effects, are as follows (\fIi\fP is an optional integer
argument, defaulting to 1) :
.PP
.IP \fIi\|\fP<space>
display
.I i
more lines, (or another screenful if no argument is given)
.PP
.IP ^D
display 11 more lines (a ``scroll'').
If
.I i
is given, then the scroll size is set to \fIi\|\fP.
.PP
.IP d
same as ^D (control-D)
.PP
.IP \fIi\|\fPz
same as typing a space except that \fIi\|\fP, if present, becomes the new
window size. Note that the window size reverts back to the default at the
end of the current file.
.PP
.IP \fIi\|\fPs
skip \fIi\|\fP lines and print a screenful of lines
.PP
.IP \fIi\|\fPf
skip \fIi\fP screenfuls and print a screenful of lines
.PP
.IP "q or Q"
quit reading the current file; go on to the next (if any)
.PP
.IP "e or q"
When the prompt --More--(Next file:
.IR file )
is printed, this command causes bzmore to exit.
.PP
.IP s
When the prompt --More--(Next file:
.IR file )
is printed, this command causes bzmore to skip the next file and continue.
.PP
.IP =
Display the current line number.
.PP
.IP \fIi\|\fP/expr
search for the \fIi\|\fP-th occurrence of the regular expression \fIexpr.\fP
If the pattern is not found,
.I bzmore
goes on to the next file (if any).
Otherwise, a screenful is displayed, starting two lines before the place
where the expression was found.
The user's erase and kill characters may be used to edit the regular
expression.
Erasing back past the first column cancels the search command.
.PP
.IP \fIi\|\fPn
search for the \fIi\|\fP-th occurrence of the last regular expression entered.
.PP
.IP !command
invoke a shell with \fIcommand\|\fP.
The character `!' in "command" are replaced with the
previous shell command. The sequence "\\!" is replaced by "!".
.PP
.IP ":q or :Q"
quit reading the current file; go on to the next (if any)
(same as q or Q).
.PP
.IP .
(dot) repeat the previous command.
.PP
The commands take effect immediately, i.e., it is not necessary to
type a carriage return.
Up to the time when the command character itself is given,
the user may hit the line kill character to cancel the numerical
argument being formed.
In addition, the user may hit the erase character to redisplay the
--More-- message.
.PP
At any time when output is being sent to the terminal, the user can
hit the quit key (normally control\-\\).
.I Bzmore
will stop sending output, and will display the usual --More--
prompt.
The user may then enter one of the above commands in the normal manner.
Unfortunately, some output is lost when this is done, due to the
fact that any characters waiting in the terminal's output queue
are flushed when the quit signal occurs.
.PP
The terminal is set to
.I noecho
mode by this program so that the output can be continuous.
What you type will thus not show on your terminal, except for the / and !
commands.
.PP
If the standard output is not a teletype, then
.I bzmore
acts just like
.I bzcat,
except that a header is printed before each file.
.SH FILES
.DT
/etc/termcap Terminal data base
.SH "SEE ALSO"
more(1), less(1), bzip2(1), bzdiff(1), bzgrep(1)

View File

@ -1,714 +0,0 @@
/*-------------------------------------------------------------*/
/*--- Compression machinery (not incl block sorting) ---*/
/*--- compress.c ---*/
/*-------------------------------------------------------------*/
/*--
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
Copyright (C) 1996-2002 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. The origin of this software must not be misrepresented; you must
not claim that you wrote the original software. If you use this
software in a product, an acknowledgment in the product
documentation would be appreciated but is not required.
3. Altered source versions must be plainly marked as such, and must
not be misrepresented as being the original software.
4. The name of the author may not be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Julian Seward, Cambridge, UK.
jseward@acm.org
bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of:
Mike Burrows
David Wheeler
Peter Fenwick
Alistair Moffat
Radford Neal
Ian H. Witten
Robert Sedgewick
Jon L. Bentley
For more information on these sources, see the manual.
--*/
/*--
CHANGES
~~~~~~~
0.9.0 -- original version.
0.9.0a/b -- no changes in this file.
0.9.0c
* changed setting of nGroups in sendMTFValues() so as to
do a bit better on small files
--*/
#include "bzlib_private.h"
/*---------------------------------------------------*/
/*--- Bit stream I/O ---*/
/*---------------------------------------------------*/
/*---------------------------------------------------*/
void BZ2_bsInitWrite ( EState* s )
{
s->bsLive = 0;
s->bsBuff = 0;
}
/*---------------------------------------------------*/
static
void bsFinishWrite ( EState* s )
{
while (s->bsLive > 0) {
s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24);
s->numZ++;
s->bsBuff <<= 8;
s->bsLive -= 8;
}
}
/*---------------------------------------------------*/
#define bsNEEDW(nz) \
{ \
while (s->bsLive >= 8) { \
s->zbits[s->numZ] \
= (UChar)(s->bsBuff >> 24); \
s->numZ++; \
s->bsBuff <<= 8; \
s->bsLive -= 8; \
} \
}
/*---------------------------------------------------*/
static
__inline__
void bsW ( EState* s, Int32 n, UInt32 v )
{
bsNEEDW ( n );
s->bsBuff |= (v << (32 - s->bsLive - n));
s->bsLive += n;
}
/*---------------------------------------------------*/
static
void bsPutUInt32 ( EState* s, UInt32 u )
{
bsW ( s, 8, (u >> 24) & 0xffL );
bsW ( s, 8, (u >> 16) & 0xffL );
bsW ( s, 8, (u >> 8) & 0xffL );
bsW ( s, 8, u & 0xffL );
}
/*---------------------------------------------------*/
static
void bsPutUChar ( EState* s, UChar c )
{
bsW( s, 8, (UInt32)c );
}
/*---------------------------------------------------*/
/*--- The back end proper ---*/
/*---------------------------------------------------*/
/*---------------------------------------------------*/
static
void makeMaps_e ( EState* s )
{
Int32 i;
s->nInUse = 0;
for (i = 0; i < 256; i++)
if (s->inUse[i]) {
s->unseqToSeq[i] = s->nInUse;
s->nInUse++;
}
}
/*---------------------------------------------------*/
static
void generateMTFValues ( EState* s )
{
UChar yy[256];
Int32 i, j;
Int32 zPend;
Int32 wr;
Int32 EOB;
/*
After sorting (eg, here),
s->arr1 [ 0 .. s->nblock-1 ] holds sorted order,
and
((UChar*)s->arr2) [ 0 .. s->nblock-1 ]
holds the original block data.
The first thing to do is generate the MTF values,
and put them in
((UInt16*)s->arr1) [ 0 .. s->nblock-1 ].
Because there are strictly fewer or equal MTF values
than block values, ptr values in this area are overwritten
with MTF values only when they are no longer needed.
The final compressed bitstream is generated into the
area starting at
(UChar*) (&((UChar*)s->arr2)[s->nblock])
These storage aliases are set up in bzCompressInit(),
except for the last one, which is arranged in
compressBlock().
*/
UInt32* ptr = s->ptr;
UChar* block = s->block;
UInt16* mtfv = s->mtfv;
makeMaps_e ( s );
EOB = s->nInUse+1;
for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0;
wr = 0;
zPend = 0;
for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i;
for (i = 0; i < s->nblock; i++) {
UChar ll_i;
AssertD ( wr <= i, "generateMTFValues(1)" );
j = ptr[i]-1; if (j < 0) j += s->nblock;
ll_i = s->unseqToSeq[block[j]];
AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" );
if (yy[0] == ll_i) {
zPend++;
} else {
if (zPend > 0) {
zPend--;
while (True) {
if (zPend & 1) {
mtfv[wr] = BZ_RUNB; wr++;
s->mtfFreq[BZ_RUNB]++;
} else {
mtfv[wr] = BZ_RUNA; wr++;
s->mtfFreq[BZ_RUNA]++;
}
if (zPend < 2) break;
zPend = (zPend - 2) / 2;
};
zPend = 0;
}
{
register UChar rtmp;
register UChar* ryy_j;
register UChar rll_i;
rtmp = yy[1];
yy[1] = yy[0];
ryy_j = &(yy[1]);
rll_i = ll_i;
while ( rll_i != rtmp ) {
register UChar rtmp2;
ryy_j++;
rtmp2 = rtmp;
rtmp = *ryy_j;
*ryy_j = rtmp2;
};
yy[0] = rtmp;
j = ryy_j - &(yy[0]);
mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++;
}
}
}
if (zPend > 0) {
zPend--;
while (True) {
if (zPend & 1) {
mtfv[wr] = BZ_RUNB; wr++;
s->mtfFreq[BZ_RUNB]++;
} else {
mtfv[wr] = BZ_RUNA; wr++;
s->mtfFreq[BZ_RUNA]++;
}
if (zPend < 2) break;
zPend = (zPend - 2) / 2;
};
zPend = 0;
}
mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++;
s->nMTF = wr;
}
/*---------------------------------------------------*/
#define BZ_LESSER_ICOST 0
#define BZ_GREATER_ICOST 15
static
void sendMTFValues ( EState* s )
{
Int32 v, t, i, j, gs, ge, totc, bt, bc, iter;
Int32 nSelectors, alphaSize, minLen, maxLen, selCtr;
Int32 nGroups, nBytes;
/*--
UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
is a global since the decoder also needs it.
Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
are also globals only used in this proc.
Made global to keep stack frame size small.
--*/
UInt16 cost[BZ_N_GROUPS];
Int32 fave[BZ_N_GROUPS];
UInt16* mtfv = s->mtfv;
if (s->verbosity >= 3)
VPrintf3( " %d in block, %d after MTF & 1-2 coding, "
"%d+2 syms in use\n",
s->nblock, s->nMTF, s->nInUse );
alphaSize = s->nInUse+2;
for (t = 0; t < BZ_N_GROUPS; t++)
for (v = 0; v < alphaSize; v++)
s->len[t][v] = BZ_GREATER_ICOST;
/*--- Decide how many coding tables to use ---*/
AssertH ( s->nMTF > 0, 3001 );
if (s->nMTF < 200) nGroups = 2; else
if (s->nMTF < 600) nGroups = 3; else
if (s->nMTF < 1200) nGroups = 4; else
if (s->nMTF < 2400) nGroups = 5; else
nGroups = 6;
/*--- Generate an initial set of coding tables ---*/
{
Int32 nPart, remF, tFreq, aFreq;
nPart = nGroups;
remF = s->nMTF;
gs = 0;
while (nPart > 0) {
tFreq = remF / nPart;
ge = gs-1;
aFreq = 0;
while (aFreq < tFreq && ge < alphaSize-1) {
ge++;
aFreq += s->mtfFreq[ge];
}
if (ge > gs
&& nPart != nGroups && nPart != 1
&& ((nGroups-nPart) % 2 == 1)) {
aFreq -= s->mtfFreq[ge];
ge--;
}
if (s->verbosity >= 3)
VPrintf5( " initial group %d, [%d .. %d], "
"has %d syms (%4.1f%%)\n",
nPart, gs, ge, aFreq,
(100.0 * (float)aFreq) / (float)(s->nMTF) );
for (v = 0; v < alphaSize; v++)
if (v >= gs && v <= ge)
s->len[nPart-1][v] = BZ_LESSER_ICOST; else
s->len[nPart-1][v] = BZ_GREATER_ICOST;
nPart--;
gs = ge+1;
remF -= aFreq;
}
}
/*---
Iterate up to BZ_N_ITERS times to improve the tables.
---*/
for (iter = 0; iter < BZ_N_ITERS; iter++) {
for (t = 0; t < nGroups; t++) fave[t] = 0;
for (t = 0; t < nGroups; t++)
for (v = 0; v < alphaSize; v++)
s->rfreq[t][v] = 0;
/*---
Set up an auxiliary length table which is used to fast-track
the common case (nGroups == 6).
---*/
if (nGroups == 6) {
for (v = 0; v < alphaSize; v++) {
s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v];
s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v];
s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v];
}
}
nSelectors = 0;
totc = 0;
gs = 0;
while (True) {
/*--- Set group start & end marks. --*/
if (gs >= s->nMTF) break;
ge = gs + BZ_G_SIZE - 1;
if (ge >= s->nMTF) ge = s->nMTF-1;
/*--
Calculate the cost of this group as coded
by each of the coding tables.
--*/
for (t = 0; t < nGroups; t++) cost[t] = 0;
if (nGroups == 6 && 50 == ge-gs+1) {
/*--- fast track the common case ---*/
register UInt32 cost01, cost23, cost45;
register UInt16 icv;
cost01 = cost23 = cost45 = 0;
# define BZ_ITER(nn) \
icv = mtfv[gs+(nn)]; \
cost01 += s->len_pack[icv][0]; \
cost23 += s->len_pack[icv][1]; \
cost45 += s->len_pack[icv][2]; \
BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4);
BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9);
BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
# undef BZ_ITER
cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16;
cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16;
cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16;
} else {
/*--- slow version which correctly handles all situations ---*/
for (i = gs; i <= ge; i++) {
UInt16 icv = mtfv[i];
for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv];
}
}
/*--
Find the coding table which is best for this group,
and record its identity in the selector table.
--*/
bc = 999999999; bt = -1;
for (t = 0; t < nGroups; t++)
if (cost[t] < bc) { bc = cost[t]; bt = t; };
totc += bc;
fave[bt]++;
s->selector[nSelectors] = bt;
nSelectors++;
/*--
Increment the symbol frequencies for the selected table.
--*/
if (nGroups == 6 && 50 == ge-gs+1) {
/*--- fast track the common case ---*/
# define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++
BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4);
BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9);
BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
# undef BZ_ITUR
} else {
/*--- slow version which correctly handles all situations ---*/
for (i = gs; i <= ge; i++)
s->rfreq[bt][ mtfv[i] ]++;
}
gs = ge+1;
}
if (s->verbosity >= 3) {
VPrintf2 ( " pass %d: size is %d, grp uses are ",
iter+1, totc/8 );
for (t = 0; t < nGroups; t++)
VPrintf1 ( "%d ", fave[t] );
VPrintf0 ( "\n" );
}
/*--
Recompute the tables based on the accumulated frequencies.
--*/
for (t = 0; t < nGroups; t++)
BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]),
alphaSize, 20 );
}
AssertH( nGroups < 8, 3002 );
AssertH( nSelectors < 32768 &&
nSelectors <= (2 + (900000 / BZ_G_SIZE)),
3003 );
/*--- Compute MTF values for the selectors. ---*/
{
UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
for (i = 0; i < nGroups; i++) pos[i] = i;
for (i = 0; i < nSelectors; i++) {
ll_i = s->selector[i];
j = 0;
tmp = pos[j];
while ( ll_i != tmp ) {
j++;
tmp2 = tmp;
tmp = pos[j];
pos[j] = tmp2;
};
pos[0] = tmp;
s->selectorMtf[i] = j;
}
};
/*--- Assign actual codes for the tables. --*/
for (t = 0; t < nGroups; t++) {
minLen = 32;
maxLen = 0;
for (i = 0; i < alphaSize; i++) {
if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
if (s->len[t][i] < minLen) minLen = s->len[t][i];
}
AssertH ( !(maxLen > 20), 3004 );
AssertH ( !(minLen < 1), 3005 );
BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]),
minLen, maxLen, alphaSize );
}
/*--- Transmit the mapping table. ---*/
{
Bool inUse16[16];
for (i = 0; i < 16; i++) {
inUse16[i] = False;
for (j = 0; j < 16; j++)
if (s->inUse[i * 16 + j]) inUse16[i] = True;
}
nBytes = s->numZ;
for (i = 0; i < 16; i++)
if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0);
for (i = 0; i < 16; i++)
if (inUse16[i])
for (j = 0; j < 16; j++) {
if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0);
}
if (s->verbosity >= 3)
VPrintf1( " bytes: mapping %d, ", s->numZ-nBytes );
}
/*--- Now the selectors. ---*/
nBytes = s->numZ;
bsW ( s, 3, nGroups );
bsW ( s, 15, nSelectors );
for (i = 0; i < nSelectors; i++) {
for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1);
bsW(s,1,0);
}
if (s->verbosity >= 3)
VPrintf1( "selectors %d, ", s->numZ-nBytes );
/*--- Now the coding tables. ---*/
nBytes = s->numZ;
for (t = 0; t < nGroups; t++) {
Int32 curr = s->len[t][0];
bsW ( s, 5, curr );
for (i = 0; i < alphaSize; i++) {
while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ };
while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ };
bsW ( s, 1, 0 );
}
}
if (s->verbosity >= 3)
VPrintf1 ( "code lengths %d, ", s->numZ-nBytes );
/*--- And finally, the block data proper ---*/
nBytes = s->numZ;
selCtr = 0;
gs = 0;
while (True) {
if (gs >= s->nMTF) break;
ge = gs + BZ_G_SIZE - 1;
if (ge >= s->nMTF) ge = s->nMTF-1;
AssertH ( s->selector[selCtr] < nGroups, 3006 );
if (nGroups == 6 && 50 == ge-gs+1) {
/*--- fast track the common case ---*/
UInt16 mtfv_i;
UChar* s_len_sel_selCtr
= &(s->len[s->selector[selCtr]][0]);
Int32* s_code_sel_selCtr
= &(s->code[s->selector[selCtr]][0]);
# define BZ_ITAH(nn) \
mtfv_i = mtfv[gs+(nn)]; \
bsW ( s, \
s_len_sel_selCtr[mtfv_i], \
s_code_sel_selCtr[mtfv_i] )
BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4);
BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9);
BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
# undef BZ_ITAH
} else {
/*--- slow version which correctly handles all situations ---*/
for (i = gs; i <= ge; i++) {
bsW ( s,
s->len [s->selector[selCtr]] [mtfv[i]],
s->code [s->selector[selCtr]] [mtfv[i]] );
}
}
gs = ge+1;
selCtr++;
}
AssertH( selCtr == nSelectors, 3007 );
if (s->verbosity >= 3)
VPrintf1( "codes %d\n", s->numZ-nBytes );
}
/*---------------------------------------------------*/
void BZ2_compressBlock ( EState* s, Bool is_last_block )
{
if (s->nblock > 0) {
BZ_FINALISE_CRC ( s->blockCRC );
s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
s->combinedCRC ^= s->blockCRC;
if (s->blockNo > 1) s->numZ = 0;
if (s->verbosity >= 2)
VPrintf4( " block %d: crc = 0x%8x, "
"combined CRC = 0x%8x, size = %d\n",
s->blockNo, s->blockCRC, s->combinedCRC, s->nblock );
BZ2_blockSort ( s );
}
s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]);
/*-- If this is the first block, create the stream header. --*/
if (s->blockNo == 1) {
BZ2_bsInitWrite ( s );
bsPutUChar ( s, BZ_HDR_B );
bsPutUChar ( s, BZ_HDR_Z );
bsPutUChar ( s, BZ_HDR_h );
bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) );
}
if (s->nblock > 0) {
bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 );
bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 );
bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 );
/*-- Now the block's CRC, so it is in a known place. --*/
bsPutUInt32 ( s, s->blockCRC );
/*--
Now a single bit indicating (non-)randomisation.
As of version 0.9.5, we use a better sorting algorithm
which makes randomisation unnecessary. So always set
the randomised bit to 'no'. Of course, the decoder
still needs to be able to handle randomised blocks
so as to maintain backwards compatibility with
older versions of bzip2.
--*/
bsW(s,1,0);
bsW ( s, 24, s->origPtr );
generateMTFValues ( s );
sendMTFValues ( s );
}
/*-- If this is the last block, add the stream trailer. --*/
if (is_last_block) {
bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 );
bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 );
bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 );
bsPutUInt32 ( s, s->combinedCRC );
if (s->verbosity >= 2)
VPrintf1( " final combined CRC = 0x%x\n ", s->combinedCRC );
bsFinishWrite ( s );
}
}
/*-------------------------------------------------------------*/
/*--- end compress.c ---*/
/*-------------------------------------------------------------*/

View File

@ -4,6 +4,11 @@
/*--- decompress.c ---*/
/*-------------------------------------------------------------*/
/*-- Modified for use under GNO by Stephen Heumann --*/
#ifdef __ORCAC__
segment "decompress", dynamic;
#endif
/*--
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
@ -80,6 +85,36 @@ void makeMaps_d ( DState* s )
#define RETURN(rrr) \
{ retVal = rrr; goto save_state_and_return; };
#ifdef __ORCAC__
void getBitsOrcaHack(DState *s) {
s->bsBuff \
= (s->bsBuff << 8) | \
((UInt32) \
(*((UChar*)(s->strm->next_in)))); \
s->bsLive += 8; \
s->strm->next_in++; \
s->strm->avail_in--; \
s->strm->total_in_lo32++; \
if (s->strm->total_in_lo32 == 0) \
s->strm->total_in_hi32++; \
}
#define GET_BITS(lll,vvv,nnn) \
case lll: s->state = lll; \
while (True) { \
if (s->bsLive >= nnn) { \
UInt32 v; \
v = (s->bsBuff >> \
(s->bsLive-nnn)) & ((1 << nnn)-1); \
s->bsLive -= nnn; \
vvv = v; \
break; \
} \
if (s->strm->avail_in == 0) RETURN(BZ_OK); \
getBitsOrcaHack(s); \
}
#else
#define GET_BITS(lll,vvv,nnn) \
case lll: s->state = lll; \
while (True) { \
@ -103,6 +138,7 @@ void makeMaps_d ( DState* s )
if (s->strm->total_in_lo32 == 0) \
s->strm->total_in_hi32++; \
}
#endif
#define GET_UCHAR(lll,uuu) \
GET_BITS(lll,uuu,8)
@ -141,7 +177,6 @@ void makeMaps_d ( DState* s )
lval = gPerm[zvec - gBase[zn]]; \
}
/*---------------------------------------------------*/
Int32 BZ2_decompress ( DState* s )
{
@ -276,7 +311,7 @@ Int32 BZ2_decompress ( DState* s )
s->currBlockNo++;
if (s->verbosity >= 2)
VPrintf1 ( "\n [%d: huff+mtf ", s->currBlockNo );
VPrintf1 ( "\n [" Int32_FMT ": huff+mtf ", s->currBlockNo );
s->storedBlockCRC = 0;
GET_UCHAR(BZ_X_BCRC_1, uc);
@ -341,8 +376,13 @@ Int32 BZ2_decompress ( DState* s )
/*--- Undo the MTF values for the selectors. ---*/
{
#ifdef __ORCAC__
UChar pos[BZ_N_GROUPS] = { 0, 1, 2, 3, 4, 5 };
UChar tmp, v;
#else
UChar pos[BZ_N_GROUPS], tmp, v;
for (v = 0; v < nGroups; v++) pos[v] = v;
#endif
for (i = 0; i < nSelectors; i++) {
v = s->selectorMtf[i];
@ -435,14 +475,22 @@ Int32 BZ2_decompress ( DState* s )
if (s->smallDecompress)
while (es > 0) {
if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
#ifdef __ORCAC__
*((UInt16 *)(s->ll16)+nblock) = (UInt16)uc;
#else
s->ll16[nblock] = (UInt16)uc;
#endif
nblock++;
es--;
}
else
while (es > 0) {
if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
#ifdef __ORCAC__
*((UInt32 *)(s->tt)+nblock) = (UInt32)uc;
#else
s->tt[nblock] = (UInt32)uc;
#endif
nblock++;
es--;
};
@ -509,8 +557,13 @@ Int32 BZ2_decompress ( DState* s )
s->unzftab[s->seqToUnseq[uc]]++;
if (s->smallDecompress)
#ifdef __ORCAC__
*((UInt16 *)(s->ll16)+nblock) = (UInt16)(s->seqToUnseq[uc]); else
*((UInt32 *)(s->tt)+nblock) = (UInt32)(s->seqToUnseq[uc]);
#else
s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else
s->tt[nblock] = (UInt32)(s->seqToUnseq[uc]);
#endif
nblock++;
GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym);
@ -542,7 +595,11 @@ Int32 BZ2_decompress ( DState* s )
/*-- compute the T vector --*/
for (i = 0; i < nblock; i++) {
#ifdef __ORCAC__
uc = (UChar) *((UInt16 *)(s->ll16)+i);
#else
uc = (UChar)(s->ll16[i]);
#endif
SET_LL(i, s->cftabCopy[uc]);
s->cftabCopy[uc]++;
}
@ -572,12 +629,21 @@ Int32 BZ2_decompress ( DState* s )
/*-- compute the T^(-1) vector --*/
for (i = 0; i < nblock; i++) {
#ifdef __ORCAC__
uc = (UChar)((*((UInt32 *)(s->tt)+i)) & 0xff);
*((UInt32 *)(s->tt)+(s->cftab[uc])) |= (i << 8);
#else
uc = (UChar)(s->tt[i] & 0xff);
s->tt[s->cftab[uc]] |= (i << 8);
#endif
s->cftab[uc]++;
}
#ifdef __ORCAC__
s->tPos = (*((UInt32 *)(s->tt)+(s->origPtr))) >> 8;
#else
s->tPos = s->tt[s->origPtr] >> 8;
#endif
s->nblock_used = 0;
if (s->blockRandomised) {
BZ_RAND_INIT_MASK;

176
dlltest.c
View File

@ -1,176 +0,0 @@
/*
minibz2
libbz2.dll test program.
by Yoshioka Tsuneo(QWF00133@nifty.ne.jp/tsuneo-y@is.aist-nara.ac.jp)
This file is Public Domain.
welcome any email to me.
usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename]
*/
#define BZ_IMPORT
#include <stdio.h>
#include <stdlib.h>
#include "bzlib.h"
#ifdef _WIN32
#include <io.h>
#endif
#ifdef _WIN32
#define BZ2_LIBNAME "libbz2-1.0.2.DLL"
#include <windows.h>
static int BZ2DLLLoaded = 0;
static HINSTANCE BZ2DLLhLib;
int BZ2DLLLoadLibrary(void)
{
HINSTANCE hLib;
if(BZ2DLLLoaded==1){return 0;}
hLib=LoadLibrary(BZ2_LIBNAME);
if(hLib == NULL){
fprintf(stderr,"Can't load %s\n",BZ2_LIBNAME);
return -1;
}
BZ2_bzlibVersion=GetProcAddress(hLib,"BZ2_bzlibVersion");
BZ2_bzopen=GetProcAddress(hLib,"BZ2_bzopen");
BZ2_bzdopen=GetProcAddress(hLib,"BZ2_bzdopen");
BZ2_bzread=GetProcAddress(hLib,"BZ2_bzread");
BZ2_bzwrite=GetProcAddress(hLib,"BZ2_bzwrite");
BZ2_bzflush=GetProcAddress(hLib,"BZ2_bzflush");
BZ2_bzclose=GetProcAddress(hLib,"BZ2_bzclose");
BZ2_bzerror=GetProcAddress(hLib,"BZ2_bzerror");
if (!BZ2_bzlibVersion || !BZ2_bzopen || !BZ2_bzdopen
|| !BZ2_bzread || !BZ2_bzwrite || !BZ2_bzflush
|| !BZ2_bzclose || !BZ2_bzerror) {
fprintf(stderr,"GetProcAddress failed.\n");
return -1;
}
BZ2DLLLoaded=1;
BZ2DLLhLib=hLib;
return 0;
}
int BZ2DLLFreeLibrary(void)
{
if(BZ2DLLLoaded==0){return 0;}
FreeLibrary(BZ2DLLhLib);
BZ2DLLLoaded=0;
}
#endif /* WIN32 */
void usage(void)
{
puts("usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename]");
}
int main(int argc,char *argv[])
{
int decompress = 0;
int level = 9;
char *fn_r = NULL;
char *fn_w = NULL;
#ifdef _WIN32
if(BZ2DLLLoadLibrary()<0){
fprintf(stderr,"Loading of %s failed. Giving up.\n", BZ2_LIBNAME);
exit(1);
}
printf("Loading of %s succeeded. Library version is %s.\n",
BZ2_LIBNAME, BZ2_bzlibVersion() );
#endif
while(++argv,--argc){
if(**argv =='-' || **argv=='/'){
char *p;
for(p=*argv+1;*p;p++){
if(*p=='d'){
decompress = 1;
}else if('1'<=*p && *p<='9'){
level = *p - '0';
}else{
usage();
exit(1);
}
}
}else{
break;
}
}
if(argc>=1){
fn_r = *argv;
argc--;argv++;
}else{
fn_r = NULL;
}
if(argc>=1){
fn_w = *argv;
argc--;argv++;
}else{
fn_w = NULL;
}
{
int len;
char buff[0x1000];
char mode[10];
if(decompress){
BZFILE *BZ2fp_r = NULL;
FILE *fp_w = NULL;
if(fn_w){
if((fp_w = fopen(fn_w,"wb"))==NULL){
printf("can't open [%s]\n",fn_w);
perror("reason:");
exit(1);
}
}else{
fp_w = stdout;
}
if((fn_r == NULL && (BZ2fp_r = BZ2_bzdopen(fileno(stdin),"rb"))==NULL)
|| (fn_r != NULL && (BZ2fp_r = BZ2_bzopen(fn_r,"rb"))==NULL)){
printf("can't bz2openstream\n");
exit(1);
}
while((len=BZ2_bzread(BZ2fp_r,buff,0x1000))>0){
fwrite(buff,1,len,fp_w);
}
BZ2_bzclose(BZ2fp_r);
if(fp_w != stdout) fclose(fp_w);
}else{
BZFILE *BZ2fp_w = NULL;
FILE *fp_r = NULL;
if(fn_r){
if((fp_r = fopen(fn_r,"rb"))==NULL){
printf("can't open [%s]\n",fn_r);
perror("reason:");
exit(1);
}
}else{
fp_r = stdin;
}
mode[0]='w';
mode[1] = '0' + level;
mode[2] = '\0';
if((fn_w == NULL && (BZ2fp_w = BZ2_bzdopen(fileno(stdout),mode))==NULL)
|| (fn_w !=NULL && (BZ2fp_w = BZ2_bzopen(fn_w,mode))==NULL)){
printf("can't bz2openstream\n");
exit(1);
}
while((len=fread(buff,1,0x1000,fp_r))>0){
BZ2_bzwrite(BZ2fp_w,buff,len);
}
BZ2_bzclose(BZ2fp_w);
if(fp_r!=stdin)fclose(fp_r);
}
}
#ifdef _WIN32
BZ2DLLFreeLibrary();
#endif
return 0;
}

View File

@ -1,93 +0,0 @@
# Microsoft Developer Studio Project File - Name="dlltest" - Package Owner=<4>
# Microsoft Developer Studio Generated Build File, Format Version 5.00
# ** 編集しないでください **
# TARGTYPE "Win32 (x86) Console Application" 0x0103
CFG=dlltest - Win32 Debug
!MESSAGE これは有効なメイクファイルではありません。 このプロジェクトをビルドするためには NMAKE を使用してください。
!MESSAGE [メイクファイルのエクスポート] コマンドを使用して実行してください
!MESSAGE
!MESSAGE NMAKE /f "dlltest.mak".
!MESSAGE
!MESSAGE NMAKE の実行時に構成を指定できます
!MESSAGE コマンド ライン上でマクロの設定を定義します。例:
!MESSAGE
!MESSAGE NMAKE /f "dlltest.mak" CFG="dlltest - Win32 Debug"
!MESSAGE
!MESSAGE 選択可能なビルド モード:
!MESSAGE
!MESSAGE "dlltest - Win32 Release" ("Win32 (x86) Console Application" 用)
!MESSAGE "dlltest - Win32 Debug" ("Win32 (x86) Console Application" 用)
!MESSAGE
# Begin Project
# PROP Scc_ProjName ""
# PROP Scc_LocalPath ""
CPP=cl.exe
RSC=rc.exe
!IF "$(CFG)" == "dlltest - Win32 Release"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 0
# PROP BASE Output_Dir "Release"
# PROP BASE Intermediate_Dir "Release"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 0
# PROP Output_Dir "Release"
# PROP Intermediate_Dir "Release"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
# ADD CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
# ADD BASE RSC /l 0x411 /d "NDEBUG"
# ADD RSC /l 0x411 /d "NDEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 /out:"minibz2.exe"
!ELSEIF "$(CFG)" == "dlltest - Win32 Debug"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 1
# PROP BASE Output_Dir "dlltest_"
# PROP BASE Intermediate_Dir "dlltest_"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 1
# PROP Output_Dir "dlltest_"
# PROP Intermediate_Dir "dlltest_"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
# ADD CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
# ADD BASE RSC /l 0x411 /d "_DEBUG"
# ADD RSC /l 0x411 /d "_DEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /out:"minibz2.exe" /pdbtype:sept
!ENDIF
# Begin Target
# Name "dlltest - Win32 Release"
# Name "dlltest - Win32 Debug"
# Begin Source File
SOURCE=.\bzlib.h
# End Source File
# Begin Source File
SOURCE=.\dlltest.c
# End Source File
# End Target
# End Project

View File

@ -4,6 +4,11 @@
/*--- huffman.c ---*/
/*-------------------------------------------------------------*/
/*-- Modified for use under GNO by Stephen Heumann --*/
#ifdef __ORCAC__
segment "bzip2";
#endif
/*--
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
@ -112,9 +117,15 @@ void BZ2_hbMakeCodeLengths ( UChar *len,
Int32 nNodes, nHeap, n1, n2, i, j, k;
Bool tooLong;
#ifdef __ORCAC__
static Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ];
static Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ];
static Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ];
#else
Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ];
Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ];
Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ];
#endif
for (i = 0; i < alphaSize; i++)
weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;

View File

@ -1,27 +0,0 @@
LIBRARY LIBBZ2
DESCRIPTION "libbzip2: library for data compression"
EXPORTS
BZ2_bzCompressInit
BZ2_bzCompress
BZ2_bzCompressEnd
BZ2_bzDecompressInit
BZ2_bzDecompress
BZ2_bzDecompressEnd
BZ2_bzReadOpen
BZ2_bzReadClose
BZ2_bzReadGetUnused
BZ2_bzRead
BZ2_bzWriteOpen
BZ2_bzWrite
BZ2_bzWriteClose
BZ2_bzWriteClose64
BZ2_bzBuffToBuffCompress
BZ2_bzBuffToBuffDecompress
BZ2_bzlibVersion
BZ2_bzopen
BZ2_bzdopen
BZ2_bzread
BZ2_bzwrite
BZ2_bzflush
BZ2_bzclose
BZ2_bzerror

View File

@ -1,130 +0,0 @@
# Microsoft Developer Studio Project File - Name="libbz2" - Package Owner=<4>
# Microsoft Developer Studio Generated Build File, Format Version 5.00
# ** 編集しないでください **
# TARGTYPE "Win32 (x86) Dynamic-Link Library" 0x0102
CFG=libbz2 - Win32 Debug
!MESSAGE これは有効なメイクファイルではありません。 このプロジェクトをビルドするためには NMAKE を使用してください。
!MESSAGE [メイクファイルのエクスポート] コマンドを使用して実行してください
!MESSAGE
!MESSAGE NMAKE /f "libbz2.mak".
!MESSAGE
!MESSAGE NMAKE の実行時に構成を指定できます
!MESSAGE コマンド ライン上でマクロの設定を定義します。例:
!MESSAGE
!MESSAGE NMAKE /f "libbz2.mak" CFG="libbz2 - Win32 Debug"
!MESSAGE
!MESSAGE 選択可能なビルド モード:
!MESSAGE
!MESSAGE "libbz2 - Win32 Release" ("Win32 (x86) Dynamic-Link Library" 用)
!MESSAGE "libbz2 - Win32 Debug" ("Win32 (x86) Dynamic-Link Library" 用)
!MESSAGE
# Begin Project
# PROP Scc_ProjName ""
# PROP Scc_LocalPath ""
CPP=cl.exe
MTL=midl.exe
RSC=rc.exe
!IF "$(CFG)" == "libbz2 - Win32 Release"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 0
# PROP BASE Output_Dir "Release"
# PROP BASE Intermediate_Dir "Release"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 0
# PROP Output_Dir "Release"
# PROP Intermediate_Dir "Release"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /YX /FD /c
# ADD CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /YX /FD /c
# ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /o NUL /win32
# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /o NUL /win32
# ADD BASE RSC /l 0x411 /d "NDEBUG"
# ADD RSC /l 0x411 /d "NDEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /machine:I386
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /machine:I386 /out:"libbz2.dll"
!ELSEIF "$(CFG)" == "libbz2 - Win32 Debug"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 1
# PROP BASE Output_Dir "Debug"
# PROP BASE Intermediate_Dir "Debug"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 1
# PROP Output_Dir "Debug"
# PROP Intermediate_Dir "Debug"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /YX /FD /c
# ADD CPP /nologo /MTd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /YX /FD /c
# ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /o NUL /win32
# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /o NUL /win32
# ADD BASE RSC /l 0x411 /d "_DEBUG"
# ADD RSC /l 0x411 /d "_DEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /debug /machine:I386 /pdbtype:sept
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /debug /machine:I386 /out:"libbz2.dll" /pdbtype:sept
!ENDIF
# Begin Target
# Name "libbz2 - Win32 Release"
# Name "libbz2 - Win32 Debug"
# Begin Source File
SOURCE=.\blocksort.c
# End Source File
# Begin Source File
SOURCE=.\bzlib.c
# End Source File
# Begin Source File
SOURCE=.\bzlib.h
# End Source File
# Begin Source File
SOURCE=.\bzlib_private.h
# End Source File
# Begin Source File
SOURCE=.\compress.c
# End Source File
# Begin Source File
SOURCE=.\crctable.c
# End Source File
# Begin Source File
SOURCE=.\decompress.c
# End Source File
# Begin Source File
SOURCE=.\huffman.c
# End Source File
# Begin Source File
SOURCE=.\libbz2.def
# End Source File
# Begin Source File
SOURCE=.\randtable.c
# End Source File
# End Target
# End Project

View File

@ -1,63 +0,0 @@
# Makefile for Microsoft Visual C++ 6.0
# usage: nmake -f makefile.msc
# K.M. Syring (syring@gsf.de)
# Fixed up by JRS for bzip2-0.9.5d release.
CC=cl
CFLAGS= -DWIN32 -MD -Ox -D_FILE_OFFSET_BITS=64 -nologo
OBJS= blocksort.obj \
huffman.obj \
crctable.obj \
randtable.obj \
compress.obj \
decompress.obj \
bzlib.obj
all: lib bzip2 test
bzip2: lib
$(CC) $(CFLAGS) -o bzip2 bzip2.c libbz2.lib setargv.obj
$(CC) $(CFLAGS) -o bzip2recover bzip2recover.c
lib: $(OBJS)
lib /out:libbz2.lib $(OBJS)
test: bzip2
type words1
.\\bzip2 -1 < sample1.ref > sample1.rb2
.\\bzip2 -2 < sample2.ref > sample2.rb2
.\\bzip2 -3 < sample3.ref > sample3.rb2
.\\bzip2 -d < sample1.bz2 > sample1.tst
.\\bzip2 -d < sample2.bz2 > sample2.tst
.\\bzip2 -ds < sample3.bz2 > sample3.tst
@echo All six of the fc's should find no differences.
@echo If fc finds an error on sample3.bz2, this could be
@echo because WinZip's 'TAR file smart CR/LF conversion'
@echo is too clever for its own good. Disable this option.
@echo The correct size for sample3.ref is 120,244. If it
@echo is 150,251, WinZip has messed it up.
fc sample1.bz2 sample1.rb2
fc sample2.bz2 sample2.rb2
fc sample3.bz2 sample3.rb2
fc sample1.tst sample1.ref
fc sample2.tst sample2.ref
fc sample3.tst sample3.ref
clean:
del *.obj
del libbz2.lib
del bzip2.exe
del bzip2recover.exe
del sample1.rb2
del sample2.rb2
del sample3.rb2
del sample1.tst
del sample2.tst
del sample3.tst
.c.obj:
$(CC) $(CFLAGS) -c $*.c -o $*.obj

View File

@ -1,117 +0,0 @@
<HTML>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<!-- Created on January, 5 2002 by texi2html 1.64 -->
<!--
Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author)
Karl Berry <karl@freefriends.org>
Olaf Bachmann <obachman@mathematik.uni-kl.de>
and many others.
Maintained by: Olaf Bachmann <obachman@mathematik.uni-kl.de>
Send bugs and suggestions to <texi2html@mathematik.uni-kl.de>
-->
<HEAD>
<TITLE>Untitled Document: Untitled Document</TITLE>
<META NAME="description" CONTENT="Untitled Document: Untitled Document">
<META NAME="keywords" CONTENT="Untitled Document: Untitled Document">
<META NAME="resource-type" CONTENT="document">
<META NAME="distribution" CONTENT="global">
<META NAME="Generator" CONTENT="texi2html 1.64">
</HEAD>
<BODY LANG="" BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#800080" ALINK="#FF0000">
<A NAME="SEC_Top"></A>
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<H1>Untitled Document</H1></P><P>
The following text is the License for this software. You should
find it identical to that contained in the file LICENSE in the
source distribution.
</P><P>
@bf{------------------ START OF THE LICENSE ------------------}
</P><P>
This program, <CODE>bzip2</CODE>,
and associated library <CODE>libbzip2</CODE>, are
Copyright (C) 1996-2002 Julian R Seward. All rights reserved.
</P><P>
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
<UL>
<LI>
Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
<LI>
The origin of this software must not be misrepresented; you must
not claim that you wrote the original software. If you use this
software in a product, an acknowledgment in the product
documentation would be appreciated but is not required.
<LI>
Altered source versions must be plainly marked as such, and must
not be misrepresented as being the original software.
<LI>
The name of the author may not be used to endorse or promote
products derived from this software without specific prior written
permission.
</UL>
THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
<P>
Julian Seward, Cambridge, UK.
</P><P>
<CODE>jseward@acm.org</CODE>
</P><P>
<CODE>bzip2</CODE>/<CODE>libbzip2</CODE> version 1.0.2 of 30 December 2001.
</P><P>
@bf{------------------ END OF THE LICENSE ------------------}
</P><P>
Web sites:
</P><P>
<CODE>http://sources.redhat.com/bzip2</CODE>
</P><P>
<CODE>http://www.cacheprof.org</CODE>
</P><P>
PATENTS: To the best of my knowledge, <CODE>bzip2</CODE> does not use any patented
algorithms. However, I do not have the resources available to carry out
a full patent search. Therefore I cannot give any guarantee of the
above statement.
</P><P>
<HR SIZE=1>
<BR>
<FONT SIZE="-1">
This document was generated
by <I>Julian Seward</I> on <I>January, 5 2002</I>
using <A HREF="http://www.mathematik.uni-kl.de/~obachman/Texi2html
"><I>texi2html</I></A>
</BODY>
</HTML>

Binary file not shown.

3991
manual.ps

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,81 +0,0 @@
<HTML>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<!-- Created on January, 5 2002 by texi2html 1.64 -->
<!--
Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author)
Karl Berry <karl@freefriends.org>
Olaf Bachmann <obachman@mathematik.uni-kl.de>
and many others.
Maintained by: Olaf Bachmann <obachman@mathematik.uni-kl.de>
Send bugs and suggestions to <texi2html@mathematik.uni-kl.de>
-->
<HEAD>
<TITLE>Untitled Document: 1. Introduction</TITLE>
<META NAME="description" CONTENT="Untitled Document: 1. Introduction">
<META NAME="keywords" CONTENT="Untitled Document: 1. Introduction">
<META NAME="resource-type" CONTENT="document">
<META NAME="distribution" CONTENT="global">
<META NAME="Generator" CONTENT="texi2html 1.64">
</HEAD>
<BODY LANG="" BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#800080" ALINK="#FF0000">
<A NAME="SEC1"></A>
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> &lt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC2"> &gt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<H1> 1. Introduction </H1>
<!--docid::SEC1::-->
<P>
<CODE>bzip2</CODE> compresses files using the Burrows-Wheeler
block-sorting text compression algorithm, and Huffman coding.
Compression is generally considerably better than that
achieved by more conventional LZ77/LZ78-based compressors,
and approaches the performance of the PPM family of statistical compressors.
</P><P>
<CODE>bzip2</CODE> is built on top of <CODE>libbzip2</CODE>, a flexible library
for handling compressed data in the <CODE>bzip2</CODE> format. This manual
describes both how to use the program and
how to work with the library interface. Most of the
manual is devoted to this library, not the program,
which is good news if your interest is only in the program.
</P><P>
Chapter 2 describes how to use <CODE>bzip2</CODE>; this is the only part
you need to read if you just want to know how to operate the program.
Chapter 3 describes the programming interfaces in detail, and
Chapter 4 records some miscellaneous notes which I thought
ought to be recorded somewhere.
</P><P>
<HR SIZE="6">
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<BR>
<FONT SIZE="-1">
This document was generated
by <I>Julian Seward</I> on <I>January, 5 2002</I>
using <A HREF="http://www.mathematik.uni-kl.de/~obachman/Texi2html
"><I>texi2html</I></A>
</BODY>
</HTML>

View File

@ -1,579 +0,0 @@
<HTML>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<!-- Created on January, 5 2002 by texi2html 1.64 -->
<!--
Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author)
Karl Berry <karl@freefriends.org>
Olaf Bachmann <obachman@mathematik.uni-kl.de>
and many others.
Maintained by: Olaf Bachmann <obachman@mathematik.uni-kl.de>
Send bugs and suggestions to <texi2html@mathematik.uni-kl.de>
-->
<HEAD>
<TITLE>Untitled Document: 2. How to use <CODE>bzip2</CODE></TITLE>
<META NAME="description" CONTENT="Untitled Document: 2. How to use <CODE>bzip2</CODE>">
<META NAME="keywords" CONTENT="Untitled Document: 2. How to use <CODE>bzip2</CODE>">
<META NAME="resource-type" CONTENT="document">
<META NAME="distribution" CONTENT="global">
<META NAME="Generator" CONTENT="texi2html 1.64">
</HEAD>
<BODY LANG="" BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#800080" ALINK="#FF0000">
<A NAME="SEC2"></A>
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_1.html#SEC1"> &lt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC3"> &gt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<H1> 2. How to use <CODE>bzip2</CODE> </H1>
<!--docid::SEC2::-->
<P>
This chapter contains a copy of the <CODE>bzip2</CODE> man page,
and nothing else.
</P><P>
<BLOCKQUOTE>
<P>
<HR SIZE="6">
<A NAME="SEC3"></A>
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC2"> &lt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC4"> &gt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<H4> NAME </H4>
<!--docid::SEC3::-->
<UL>
<LI><CODE>bzip2</CODE>, <CODE>bunzip2</CODE>
- a block-sorting file compressor, v1.0.2
<LI><CODE>bzcat</CODE>
- decompresses files to stdout
<LI><CODE>bzip2recover</CODE>
- recovers data from damaged bzip2 files
</UL>
<P>
<HR SIZE="6">
<A NAME="SEC4"></A>
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC3"> &lt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC5"> &gt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<H4> SYNOPSIS </H4>
<!--docid::SEC4::-->
<UL>
<LI><CODE>bzip2</CODE> [ -cdfkqstvzVL123456789 ] [ filenames ... ]
<LI><CODE>bunzip2</CODE> [ -fkvsVL ] [ filenames ... ]
<LI><CODE>bzcat</CODE> [ -s ] [ filenames ... ]
<LI><CODE>bzip2recover</CODE> filename
</UL>
<P>
<HR SIZE="6">
<A NAME="SEC5"></A>
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC4"> &lt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC6"> &gt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<H4> DESCRIPTION </H4>
<!--docid::SEC5::-->
<P>
<CODE>bzip2</CODE> compresses files using the Burrows-Wheeler block sorting
text compression algorithm, and Huffman coding. Compression is
generally considerably better than that achieved by more conventional
LZ77/LZ78-based compressors, and approaches the performance of the PPM
family of statistical compressors.
</P><P>
The command-line options are deliberately very similar to those of GNU
<CODE>gzip</CODE>, but they are not identical.
</P><P>
<CODE>bzip2</CODE> expects a list of file names to accompany the command-line
flags. Each file is replaced by a compressed version of itself, with
the name <CODE>original_name.bz2</CODE>. Each compressed file has the same
modification date, permissions, and, when possible, ownership as the
corresponding original, so that these properties can be correctly
restored at decompression time. File name handling is naive in the
sense that there is no mechanism for preserving original file names,
permissions, ownerships or dates in filesystems which lack these
concepts, or have serious file name length restrictions, such as MS-DOS.
</P><P>
<CODE>bzip2</CODE> and <CODE>bunzip2</CODE> will by default not overwrite existing
files. If you want this to happen, specify the <CODE>-f</CODE> flag.
</P><P>
If no file names are specified, <CODE>bzip2</CODE> compresses from standard
input to standard output. In this case, <CODE>bzip2</CODE> will decline to
write compressed output to a terminal, as this would be entirely
incomprehensible and therefore pointless.
</P><P>
<CODE>bunzip2</CODE> (or <CODE>bzip2 -d</CODE>) decompresses all
specified files. Files which were not created by <CODE>bzip2</CODE>
will be detected and ignored, and a warning issued.
<CODE>bzip2</CODE> attempts to guess the filename for the decompressed file
from that of the compressed file as follows:
<UL>
<LI><CODE>filename.bz2 </CODE> becomes <CODE>filename</CODE>
<LI><CODE>filename.bz </CODE> becomes <CODE>filename</CODE>
<LI><CODE>filename.tbz2</CODE> becomes <CODE>filename.tar</CODE>
<LI><CODE>filename.tbz </CODE> becomes <CODE>filename.tar</CODE>
<LI><CODE>anyothername </CODE> becomes <CODE>anyothername.out</CODE>
</UL>
If the file does not end in one of the recognised endings,
<CODE>.bz2</CODE>, <CODE>.bz</CODE>,
<CODE>.tbz2</CODE> or <CODE>.tbz</CODE>, <CODE>bzip2</CODE> complains that it cannot
guess the name of the original file, and uses the original name
with <CODE>.out</CODE> appended.
<P>
As with compression, supplying no
filenames causes decompression from standard input to standard output.
</P><P>
<CODE>bunzip2</CODE> will correctly decompress a file which is the
concatenation of two or more compressed files. The result is the
concatenation of the corresponding uncompressed files. Integrity
testing (<CODE>-t</CODE>) of concatenated compressed files is also supported.
</P><P>
You can also compress or decompress files to the standard output by
giving the <CODE>-c</CODE> flag. Multiple files may be compressed and
decompressed like this. The resulting outputs are fed sequentially to
stdout. Compression of multiple files in this manner generates a stream
containing multiple compressed file representations. Such a stream
can be decompressed correctly only by <CODE>bzip2</CODE> version 0.9.0 or
later. Earlier versions of <CODE>bzip2</CODE> will stop after decompressing
the first file in the stream.
</P><P>
<CODE>bzcat</CODE> (or <CODE>bzip2 -dc</CODE>) decompresses all specified files to
the standard output.
</P><P>
<CODE>bzip2</CODE> will read arguments from the environment variables
<CODE>BZIP2</CODE> and <CODE>BZIP</CODE>, in that order, and will process them
before any arguments read from the command line. This gives a
convenient way to supply default arguments.
</P><P>
Compression is always performed, even if the compressed file is slightly
larger than the original. Files of less than about one hundred bytes
tend to get larger, since the compression mechanism has a constant
overhead in the region of 50 bytes. Random data (including the output
of most file compressors) is coded at about 8.05 bits per byte, giving
an expansion of around 0.5%.
</P><P>
As a self-check for your protection, <CODE>bzip2</CODE> uses 32-bit CRCs to
make sure that the decompressed version of a file is identical to the
original. This guards against corruption of the compressed data, and
against undetected bugs in <CODE>bzip2</CODE> (hopefully very unlikely). The
chances of data corruption going undetected is microscopic, about one
chance in four billion for each file processed. Be aware, though, that
the check occurs upon decompression, so it can only tell you that
something is wrong. It can't help you recover the original uncompressed
data. You can use <CODE>bzip2recover</CODE> to try to recover data from
damaged files.
</P><P>
Return values: 0 for a normal exit, 1 for environmental problems (file
not found, invalid flags, I/O errors, &#38;c), 2 to indicate a corrupt
compressed file, 3 for an internal consistency error (eg, bug) which
caused <CODE>bzip2</CODE> to panic.
</P><P>
<HR SIZE="6">
<A NAME="SEC6"></A>
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC5"> &lt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC7"> &gt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<H4> OPTIONS </H4>
<!--docid::SEC6::-->
<DL COMPACT>
<DT><CODE>-c --stdout</CODE>
<DD>Compress or decompress to standard output.
<DT><CODE>-d --decompress</CODE>
<DD>Force decompression. <CODE>bzip2</CODE>, <CODE>bunzip2</CODE> and <CODE>bzcat</CODE> are
really the same program, and the decision about what actions to take is
done on the basis of which name is used. This flag overrides that
mechanism, and forces bzip2 to decompress.
<DT><CODE>-z --compress</CODE>
<DD>The complement to <CODE>-d</CODE>: forces compression, regardless of the
invokation name.
<DT><CODE>-t --test</CODE>
<DD>Check integrity of the specified file(s), but don't decompress them.
This really performs a trial decompression and throws away the result.
<DT><CODE>-f --force</CODE>
<DD>Force overwrite of output files. Normally, <CODE>bzip2</CODE> will not overwrite
existing output files. Also forces <CODE>bzip2</CODE> to break hard links
to files, which it otherwise wouldn't do.
<P>
<CODE>bzip2</CODE> normally declines to decompress files which don't have the
correct magic header bytes. If forced (<CODE>-f</CODE>), however, it will
pass such files through unmodified. This is how GNU <CODE>gzip</CODE>
behaves.
<DT><CODE>-k --keep</CODE>
<DD>Keep (don't delete) input files during compression
or decompression.
<DT><CODE>-s --small</CODE>
<DD>Reduce memory usage, for compression, decompression and testing. Files
are decompressed and tested using a modified algorithm which only
requires 2.5 bytes per block byte. This means any file can be
decompressed in 2300k of memory, albeit at about half the normal speed.
<P>
During compression, <CODE>-s</CODE> selects a block size of 200k, which limits
memory use to around the same figure, at the expense of your compression
ratio. In short, if your machine is low on memory (8 megabytes or
less), use -s for everything. See MEMORY MANAGEMENT below.
<DT><CODE>-q --quiet</CODE>
<DD>Suppress non-essential warning messages. Messages pertaining to
I/O errors and other critical events will not be suppressed.
<DT><CODE>-v --verbose</CODE>
<DD>Verbose mode -- show the compression ratio for each file processed.
Further <CODE>-v</CODE>'s increase the verbosity level, spewing out lots of
information which is primarily of interest for diagnostic purposes.
<DT><CODE>-L --license -V --version</CODE>
<DD>Display the software version, license terms and conditions.
<DT><CODE>-1 (or --fast) to -9 (or --best)</CODE>
<DD>Set the block size to 100 k, 200 k .. 900 k when compressing. Has no
effect when decompressing. See MEMORY MANAGEMENT below.
The <CODE>--fast</CODE> and <CODE>--best</CODE> aliases are primarily for GNU
<CODE>gzip</CODE> compatibility. In particular, <CODE>--fast</CODE> doesn't make
things significantly faster. And <CODE>--best</CODE> merely selects the
default behaviour.
<DT><CODE>--</CODE>
<DD>Treats all subsequent arguments as file names, even if they start
with a dash. This is so you can handle files with names beginning
with a dash, for example: <CODE>bzip2 -- -myfilename</CODE>.
<DT><CODE>--repetitive-fast</CODE>
<DD><DT><CODE>--repetitive-best</CODE>
<DD>These flags are redundant in versions 0.9.5 and above. They provided
some coarse control over the behaviour of the sorting algorithm in
earlier versions, which was sometimes useful. 0.9.5 and above have an
improved algorithm which renders these flags irrelevant.
</DL>
<P>
<HR SIZE="6">
<A NAME="SEC7"></A>
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC6"> &lt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC8"> &gt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<H4> MEMORY MANAGEMENT </H4>
<!--docid::SEC7::-->
<P>
<CODE>bzip2</CODE> compresses large files in blocks. The block size affects
both the compression ratio achieved, and the amount of memory needed for
compression and decompression. The flags <CODE>-1</CODE> through <CODE>-9</CODE>
specify the block size to be 100,000 bytes through 900,000 bytes (the
default) respectively. At decompression time, the block size used for
compression is read from the header of the compressed file, and
<CODE>bunzip2</CODE> then allocates itself just enough memory to decompress
the file. Since block sizes are stored in compressed files, it follows
that the flags <CODE>-1</CODE> to <CODE>-9</CODE> are irrelevant to and so ignored
during decompression.
</P><P>
Compression and decompression requirements, in bytes, can be estimated
as:
<TABLE><tr><td>&nbsp;</td><td class=example><pre> Compression: 400k + ( 8 x block size )
Decompression: 100k + ( 4 x block size ), or
100k + ( 2.5 x block size )
</pre></td></tr></table>Larger block sizes give rapidly diminishing marginal returns. Most of
the compression comes from the first two or three hundred k of block
size, a fact worth bearing in mind when using <CODE>bzip2</CODE> on small machines.
It is also important to appreciate that the decompression memory
requirement is set at compression time by the choice of block size.
</P><P>
For files compressed with the default 900k block size, <CODE>bunzip2</CODE>
will require about 3700 kbytes to decompress. To support decompression
of any file on a 4 megabyte machine, <CODE>bunzip2</CODE> has an option to
decompress using approximately half this amount of memory, about 2300
kbytes. Decompression speed is also halved, so you should use this
option only where necessary. The relevant flag is <CODE>-s</CODE>.
</P><P>
In general, try and use the largest block size memory constraints allow,
since that maximises the compression achieved. Compression and
decompression speed are virtually unaffected by block size.
</P><P>
Another significant point applies to files which fit in a single block
-- that means most files you'd encounter using a large block size. The
amount of real memory touched is proportional to the size of the file,
since the file is smaller than a block. For example, compressing a file
20,000 bytes long with the flag <CODE>-9</CODE> will cause the compressor to
allocate around 7600k of memory, but only touch 400k + 20000 * 8 = 560
kbytes of it. Similarly, the decompressor will allocate 3700k but only
touch 100k + 20000 * 4 = 180 kbytes.
</P><P>
Here is a table which summarises the maximum memory usage for different
block sizes. Also recorded is the total compressed size for 14 files of
the Calgary Text Compression Corpus totalling 3,141,622 bytes. This
column gives some feel for how compression varies with block size.
These figures tend to understate the advantage of larger block sizes for
larger files, since the Corpus is dominated by smaller files.
<TABLE><tr><td>&nbsp;</td><td class=example><pre> Compress Decompress Decompress Corpus
Flag usage usage -s usage Size
-1 1200k 500k 350k 914704
-2 2000k 900k 600k 877703
-3 2800k 1300k 850k 860338
-4 3600k 1700k 1100k 846899
-5 4400k 2100k 1350k 845160
-6 5200k 2500k 1600k 838626
-7 6100k 2900k 1850k 834096
-8 6800k 3300k 2100k 828642
-9 7600k 3700k 2350k 828642
</pre></td></tr></table></P><P>
<HR SIZE="6">
<A NAME="SEC8"></A>
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC7"> &lt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC9"> &gt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<H4> RECOVERING DATA FROM DAMAGED FILES </H4>
<!--docid::SEC8::-->
<P>
<CODE>bzip2</CODE> compresses files in blocks, usually 900kbytes long. Each
block is handled independently. If a media or transmission error causes
a multi-block <CODE>.bz2</CODE> file to become damaged, it may be possible to
recover data from the undamaged blocks in the file.
</P><P>
The compressed representation of each block is delimited by a 48-bit
pattern, which makes it possible to find the block boundaries with
reasonable certainty. Each block also carries its own 32-bit CRC, so
damaged blocks can be distinguished from undamaged ones.
</P><P>
<CODE>bzip2recover</CODE> is a simple program whose purpose is to search for
blocks in <CODE>.bz2</CODE> files, and write each block out into its own
<CODE>.bz2</CODE> file. You can then use <CODE>bzip2 -t</CODE> to test the
integrity of the resulting files, and decompress those which are
undamaged.
</P><P>
<CODE>bzip2recover</CODE>
takes a single argument, the name of the damaged file, and writes a
number of files <CODE>rec00001file.bz2</CODE>, <CODE>rec00002file.bz2</CODE>, etc,
containing the extracted blocks. The output filenames are designed so
that the use of wildcards in subsequent processing -- for example,
<CODE>bzip2 -dc rec*file.bz2 &#62; recovered_data</CODE> -- processes the files in
the correct order.
</P><P>
<CODE>bzip2recover</CODE> should be of most use dealing with large <CODE>.bz2</CODE>
files, as these will contain many blocks. It is clearly futile to use
it on damaged single-block files, since a damaged block cannot be
recovered. If you wish to minimise any potential data loss through
media or transmission errors, you might consider compressing with a
smaller block size.
</P><P>
<HR SIZE="6">
<A NAME="SEC9"></A>
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC8"> &lt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC10"> &gt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<H4> PERFORMANCE NOTES </H4>
<!--docid::SEC9::-->
<P>
The sorting phase of compression gathers together similar strings in the
file. Because of this, files containing very long runs of repeated
symbols, like "aabaabaabaab ..." (repeated several hundred times) may
compress more slowly than normal. Versions 0.9.5 and above fare much
better than previous versions in this respect. The ratio between
worst-case and average-case compression time is in the region of 10:1.
For previous versions, this figure was more like 100:1. You can use the
<CODE>-vvvv</CODE> option to monitor progress in great detail, if you want.
</P><P>
Decompression speed is unaffected by these phenomena.
</P><P>
<CODE>bzip2</CODE> usually allocates several megabytes of memory to operate
in, and then charges all over it in a fairly random fashion. This means
that performance, both for compressing and decompressing, is largely
determined by the speed at which your machine can service cache misses.
Because of this, small changes to the code to reduce the miss rate have
been observed to give disproportionately large performance improvements.
I imagine <CODE>bzip2</CODE> will perform best on machines with very large
caches.
</P><P>
<HR SIZE="6">
<A NAME="SEC10"></A>
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC9"> &lt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC11"> &gt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<H4> CAVEATS </H4>
<!--docid::SEC10::-->
<P>
I/O error messages are not as helpful as they could be. <CODE>bzip2</CODE>
tries hard to detect I/O errors and exit cleanly, but the details of
what the problem is sometimes seem rather misleading.
</P><P>
This manual page pertains to version 1.0.2 of <CODE>bzip2</CODE>. Compressed
data created by this version is entirely forwards and backwards
compatible with the previous public releases, versions 0.1pl2, 0.9.0,
0.9.5, 1.0.0 and 1.0.1, but with the following exception: 0.9.0 and
above can correctly decompress multiple concatenated compressed files.
0.1pl2 cannot do this; it will stop after decompressing just the first
file in the stream.
</P><P>
<CODE>bzip2recover</CODE> versions prior to this one, 1.0.2, used 32-bit
integers to represent bit positions in compressed files, so it could not
handle compressed files more than 512 megabytes long. Version 1.0.2 and
above uses 64-bit ints on some platforms which support them (GNU
supported targets, and Windows). To establish whether or not
<CODE>bzip2recover</CODE> was built with such a limitation, run it without
arguments. In any event you can build yourself an unlimited version if
you can recompile it with <CODE>MaybeUInt64</CODE> set to be an unsigned
64-bit integer.
</P><P>
<HR SIZE="6">
<A NAME="SEC11"></A>
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC10"> &lt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_3.html#SEC12"> &gt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<H4> AUTHOR </H4>
<!--docid::SEC11::-->
Julian Seward, <CODE>jseward@acm.org</CODE>.
<P>
<CODE>http://sources.redhat.com/bzip2</CODE>
</P><P>
The ideas embodied in <CODE>bzip2</CODE> are due to (at least) the following
people: Michael Burrows and David Wheeler (for the block sorting
transformation), David Wheeler (again, for the Huffman coder), Peter
Fenwick (for the structured coding model in the original <CODE>bzip</CODE>,
and many refinements), and Alistair Moffat, Radford Neal and Ian Witten
(for the arithmetic coder in the original <CODE>bzip</CODE>). I am much
indebted for their help, support and advice. See the manual in the
source distribution for pointers to sources of documentation. Christian
von Roques encouraged me to look for faster sorting algorithms, so as to
speed up compression. Bela Lubkin encouraged me to improve the
worst-case compression performance. The <CODE>bz*</CODE> scripts are derived
from those of GNU <CODE>gzip</CODE>. Many people sent patches, helped with
portability problems, lent machines, gave advice and were generally
helpful.
</P><P>
</BLOCKQUOTE>
<HR SIZE="6">
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<BR>
<FONT SIZE="-1">
This document was generated
by <I>Julian Seward</I> on <I>January, 5 2002</I>
using <A HREF="http://www.mathematik.uni-kl.de/~obachman/Texi2html
"><I>texi2html</I></A>
</BODY>
</HTML>

File diff suppressed because it is too large Load Diff

View File

@ -1,530 +0,0 @@
<HTML>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<!-- Created on January, 5 2002 by texi2html 1.64 -->
<!--
Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author)
Karl Berry <karl@freefriends.org>
Olaf Bachmann <obachman@mathematik.uni-kl.de>
and many others.
Maintained by: Olaf Bachmann <obachman@mathematik.uni-kl.de>
Send bugs and suggestions to <texi2html@mathematik.uni-kl.de>
-->
<HEAD>
<TITLE>Untitled Document: 4. Miscellanea</TITLE>
<META NAME="description" CONTENT="Untitled Document: 4. Miscellanea">
<META NAME="keywords" CONTENT="Untitled Document: 4. Miscellanea">
<META NAME="resource-type" CONTENT="document">
<META NAME="distribution" CONTENT="global">
<META NAME="Generator" CONTENT="texi2html 1.64">
</HEAD>
<BODY LANG="" BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#800080" ALINK="#FF0000">
<A NAME="SEC43"></A>
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_3.html#SEC42"> &lt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_4.html#SEC44"> &gt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<H1> 4. Miscellanea </H1>
<!--docid::SEC43::-->
<P>
These are just some random thoughts of mine. Your mileage may
vary.
</P><P>
<HR SIZE="6">
<A NAME="SEC44"></A>
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_4.html#SEC43"> &lt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_4.html#SEC45"> &gt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<H2> 4.1 Limitations of the compressed file format </H2>
<!--docid::SEC44::-->
<CODE>bzip2-1.0</CODE>, <CODE>0.9.5</CODE> and <CODE>0.9.0</CODE>
use exactly the same file format as the previous
version, <CODE>bzip2-0.1</CODE>. This decision was made in the interests of
stability. Creating yet another incompatible compressed file format
would create further confusion and disruption for users.
<P>
Nevertheless, this is not a painless decision. Development
work since the release of <CODE>bzip2-0.1</CODE> in August 1997
has shown complexities in the file format which slow down
decompression and, in retrospect, are unnecessary. These are:
<UL>
<LI>The run-length encoder, which is the first of the
compression transformations, is entirely irrelevant.
The original purpose was to protect the sorting algorithm
from the very worst case input: a string of repeated
symbols. But algorithm steps Q6a and Q6b in the original
Burrows-Wheeler technical report (SRC-124) show how
repeats can be handled without difficulty in block
sorting.
<LI>The randomisation mechanism doesn't really need to be
there. Udi Manber and Gene Myers published a suffix
array construction algorithm a few years back, which
can be employed to sort any block, no matter how
repetitive, in O(N log N) time. Subsequent work by
Kunihiko Sadakane has produced a derivative O(N (log N)^2)
algorithm which usually outperforms the Manber-Myers
algorithm.
<P>
I could have changed to Sadakane's algorithm, but I find
it to be slower than <CODE>bzip2</CODE>'s existing algorithm for
most inputs, and the randomisation mechanism protects
adequately against bad cases. I didn't think it was
a good tradeoff to make. Partly this is due to the fact
that I was not flooded with email complaints about
<CODE>bzip2-0.1</CODE>'s performance on repetitive data, so
perhaps it isn't a problem for real inputs.
</P><P>
Probably the best long-term solution,
and the one I have incorporated into 0.9.5 and above,
is to use the existing sorting
algorithm initially, and fall back to a O(N (log N)^2)
algorithm if the standard algorithm gets into difficulties.
<LI>The compressed file format was never designed to be
handled by a library, and I have had to jump though
some hoops to produce an efficient implementation of
decompression. It's a bit hairy. Try passing
<CODE>decompress.c</CODE> through the C preprocessor
and you'll see what I mean. Much of this complexity
could have been avoided if the compressed size of
each block of data was recorded in the data stream.
<LI>An Adler-32 checksum, rather than a CRC32 checksum,
would be faster to compute.
</UL>
It would be fair to say that the <CODE>bzip2</CODE> format was frozen
before I properly and fully understood the performance
consequences of doing so.
<P>
Improvements which I was able to incorporate into
0.9.0, despite using the same file format, are:
<UL>
<LI>Single array implementation of the inverse BWT. This
significantly speeds up decompression, presumably
because it reduces the number of cache misses.
<LI>Faster inverse MTF transform for large MTF values. The
new implementation is based on the notion of sliding blocks
of values.
<LI><CODE>bzip2-0.9.0</CODE> now reads and writes files with <CODE>fread</CODE>
and <CODE>fwrite</CODE>; version 0.1 used <CODE>putc</CODE> and <CODE>getc</CODE>.
Duh! Well, you live and learn.
<P>
</UL>
Further ahead, it would be nice
to be able to do random access into files. This will
require some careful design of compressed file formats.
<P>
<HR SIZE="6">
<A NAME="SEC45"></A>
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_4.html#SEC44"> &lt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_4.html#SEC46"> &gt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<H2> 4.2 Portability issues </H2>
<!--docid::SEC45::-->
After some consideration, I have decided not to use
GNU <CODE>autoconf</CODE> to configure 0.9.5 or 1.0.
<P>
<CODE>autoconf</CODE>, admirable and wonderful though it is,
mainly assists with portability problems between Unix-like
platforms. But <CODE>bzip2</CODE> doesn't have much in the way
of portability problems on Unix; most of the difficulties appear
when porting to the Mac, or to Microsoft's operating systems.
<CODE>autoconf</CODE> doesn't help in those cases, and brings in a
whole load of new complexity.
</P><P>
Most people should be able to compile the library and program
under Unix straight out-of-the-box, so to speak, especially
if you have a version of GNU C available.
</P><P>
There are a couple of <CODE>__inline__</CODE> directives in the code. GNU C
(<CODE>gcc</CODE>) should be able to handle them. If you're not using
GNU C, your C compiler shouldn't see them at all.
If your compiler does, for some reason, see them and doesn't
like them, just <CODE>#define</CODE> <CODE>__inline__</CODE> to be <CODE>/* */</CODE>. One
easy way to do this is to compile with the flag <CODE>-D__inline__=</CODE>,
which should be understood by most Unix compilers.
</P><P>
If you still have difficulties, try compiling with the macro
<CODE>BZ_STRICT_ANSI</CODE> defined. This should enable you to build the
library in a strictly ANSI compliant environment. Building the program
itself like this is dangerous and not supported, since you remove
<CODE>bzip2</CODE>'s checks against compressing directories, symbolic links,
devices, and other not-really-a-file entities. This could cause
filesystem corruption!
</P><P>
One other thing: if you create a <CODE>bzip2</CODE> binary for public
distribution, please try and link it statically (<CODE>gcc -s</CODE>). This
avoids all sorts of library-version issues that others may encounter
later on.
</P><P>
If you build <CODE>bzip2</CODE> on Win32, you must set <CODE>BZ_UNIX</CODE> to 0 and
<CODE>BZ_LCCWIN32</CODE> to 1, in the file <CODE>bzip2.c</CODE>, before compiling.
Otherwise the resulting binary won't work correctly.
</P><P>
<HR SIZE="6">
<A NAME="SEC46"></A>
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_4.html#SEC45"> &lt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_4.html#SEC47"> &gt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<H2> 4.3 Reporting bugs </H2>
<!--docid::SEC46::-->
I tried pretty hard to make sure <CODE>bzip2</CODE> is
bug free, both by design and by testing. Hopefully
you'll never need to read this section for real.
<P>
Nevertheless, if <CODE>bzip2</CODE> dies with a segmentation
fault, a bus error or an internal assertion failure, it
will ask you to email me a bug report. Experience with
version 0.1 shows that almost all these problems can
be traced to either compiler bugs or hardware problems.
<UL>
<LI>
Recompile the program with no optimisation, and see if it
works. And/or try a different compiler.
I heard all sorts of stories about various flavours
of GNU C (and other compilers) generating bad code for
<CODE>bzip2</CODE>, and I've run across two such examples myself.
<P>
2.7.X versions of GNU C are known to generate bad code from
time to time, at high optimisation levels.
If you get problems, try using the flags
<CODE>-O2</CODE> <CODE>-fomit-frame-pointer</CODE> <CODE>-fno-strength-reduce</CODE>.
You should specifically <EM>not</EM> use <CODE>-funroll-loops</CODE>.
</P><P>
You may notice that the Makefile runs six tests as part of
the build process. If the program passes all of these, it's
a pretty good (but not 100%) indication that the compiler has
done its job correctly.
<LI>
If <CODE>bzip2</CODE> crashes randomly, and the crashes are not
repeatable, you may have a flaky memory subsystem. <CODE>bzip2</CODE>
really hammers your memory hierarchy, and if it's a bit marginal,
you may get these problems. Ditto if your disk or I/O subsystem
is slowly failing. Yup, this really does happen.
<P>
Try using a different machine of the same type, and see if
you can repeat the problem.
<LI>This isn't really a bug, but ... If <CODE>bzip2</CODE> tells
you your file is corrupted on decompression, and you
obtained the file via FTP, there is a possibility that you
forgot to tell FTP to do a binary mode transfer. That absolutely
will cause the file to be non-decompressible. You'll have to transfer
it again.
</UL>
<P>
If you've incorporated <CODE>libbzip2</CODE> into your own program
and are getting problems, please, please, please, check that the
parameters you are passing in calls to the library, are
correct, and in accordance with what the documentation says
is allowable. I have tried to make the library robust against
such problems, but I'm sure I haven't succeeded.
</P><P>
Finally, if the above comments don't help, you'll have to send
me a bug report. Now, it's just amazing how many people will
send me a bug report saying something like
<TABLE><tr><td>&nbsp;</td><td class=display><pre style="font-family: serif"> bzip2 crashed with segmentation fault on my machine
</pre></td></tr></table>and absolutely nothing else. Needless to say, a such a report
is <EM>totally, utterly, completely and comprehensively 100% useless;
a waste of your time, my time, and net bandwidth</EM>.
With no details at all, there's no way I can possibly begin
to figure out what the problem is.
</P><P>
The rules of the game are: facts, facts, facts. Don't omit
them because "oh, they won't be relevant". At the bare
minimum:
<TABLE><tr><td>&nbsp;</td><td class=display><pre style="font-family: serif"> Machine type. Operating system version.
Exact version of <CODE>bzip2</CODE> (do <CODE>bzip2 -V</CODE>).
Exact version of the compiler used.
Flags passed to the compiler.
</pre></td></tr></table>However, the most important single thing that will help me is
the file that you were trying to compress or decompress at the
time the problem happened. Without that, my ability to do anything
more than speculate about the cause, is limited.
</P><P>
Please remember that I connect to the Internet with a modem, so
you should contact me before mailing me huge files.
</P><P>
<HR SIZE="6">
<A NAME="SEC47"></A>
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_4.html#SEC46"> &lt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_4.html#SEC48"> &gt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<H2> 4.4 Did you get the right package? </H2>
<!--docid::SEC47::-->
<P>
<CODE>bzip2</CODE> is a resource hog. It soaks up large amounts of CPU cycles
and memory. Also, it gives very large latencies. In the worst case, you
can feed many megabytes of uncompressed data into the library before
getting any compressed output, so this probably rules out applications
requiring interactive behaviour.
</P><P>
These aren't faults of my implementation, I hope, but more
an intrinsic property of the Burrows-Wheeler transform (unfortunately).
Maybe this isn't what you want.
</P><P>
If you want a compressor and/or library which is faster, uses less
memory but gets pretty good compression, and has minimal latency,
consider Jean-loup
Gailly's and Mark Adler's work, <CODE>zlib-1.1.3</CODE> and
<CODE>gzip-1.2.4</CODE>. Look for them at
</P><P>
<CODE>http://www.zlib.org</CODE> and
<CODE>http://www.gzip.org</CODE> respectively.
</P><P>
For something faster and lighter still, you might try Markus F X J
Oberhumer's <CODE>LZO</CODE> real-time compression/decompression library, at
<BR> <CODE>http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html</CODE>.
</P><P>
If you want to use the <CODE>bzip2</CODE> algorithms to compress small blocks
of data, 64k bytes or smaller, for example on an on-the-fly disk
compressor, you'd be well advised not to use this library. Instead,
I've made a special library tuned for that kind of use. It's part of
<CODE>e2compr-0.40</CODE>, an on-the-fly disk compressor for the Linux
<CODE>ext2</CODE> filesystem. Look at
<CODE>http://www.netspace.net.au/~reiter/e2compr</CODE>.
</P><P>
<HR SIZE="6">
<A NAME="SEC48"></A>
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_4.html#SEC47"> &lt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_4.html#SEC49"> &gt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<H2> 4.5 Testing </H2>
<!--docid::SEC48::-->
<P>
A record of the tests I've done.
</P><P>
First, some data sets:
<UL>
<LI>B: a directory containing 6001 files, one for every length in the
range 0 to 6000 bytes. The files contain random lowercase
letters. 18.7 megabytes.
<LI>H: my home directory tree. Documents, source code, mail files,
compressed data. H contains B, and also a directory of
files designed as boundary cases for the sorting; mostly very
repetitive, nasty files. 565 megabytes.
<LI>A: directory tree holding various applications built from source:
<CODE>egcs</CODE>, <CODE>gcc-2.8.1</CODE>, KDE, GTK, Octave, etc.
2200 megabytes.
</UL>
The tests conducted are as follows. Each test means compressing
(a copy of) each file in the data set, decompressing it and
comparing it against the original.
<P>
First, a bunch of tests with block sizes and internal buffer
sizes set very small,
to detect any problems with the
blocking and buffering mechanisms.
This required modifying the source code so as to try to
break it.
<OL>
<LI>Data set H, with
buffer size of 1 byte, and block size of 23 bytes.
<LI>Data set B, buffer sizes 1 byte, block size 1 byte.
<LI>As (2) but small-mode decompression.
<LI>As (2) with block size 2 bytes.
<LI>As (2) with block size 3 bytes.
<LI>As (2) with block size 4 bytes.
<LI>As (2) with block size 5 bytes.
<LI>As (2) with block size 6 bytes and small-mode decompression.
<LI>H with buffer size of 1 byte, but normal block
size (up to 900000 bytes).
</OL>
Then some tests with unmodified source code.
<OL>
<LI>H, all settings normal.
<LI>As (1), with small-mode decompress.
<LI>H, compress with flag <CODE>-1</CODE>.
<LI>H, compress with flag <CODE>-s</CODE>, decompress with flag <CODE>-s</CODE>.
<LI>Forwards compatibility: H, <CODE>bzip2-0.1pl2</CODE> compressing,
<CODE>bzip2-0.9.5</CODE> decompressing, all settings normal.
<LI>Backwards compatibility: H, <CODE>bzip2-0.9.5</CODE> compressing,
<CODE>bzip2-0.1pl2</CODE> decompressing, all settings normal.
<LI>Bigger tests: A, all settings normal.
<LI>As (7), using the fallback (Sadakane-like) sorting algorithm.
<LI>As (8), compress with flag <CODE>-1</CODE>, decompress with flag
<CODE>-s</CODE>.
<LI>H, using the fallback sorting algorithm.
<LI>Forwards compatibility: A, <CODE>bzip2-0.1pl2</CODE> compressing,
<CODE>bzip2-0.9.5</CODE> decompressing, all settings normal.
<LI>Backwards compatibility: A, <CODE>bzip2-0.9.5</CODE> compressing,
<CODE>bzip2-0.1pl2</CODE> decompressing, all settings normal.
<LI>Misc test: about 400 megabytes of <CODE>.tar</CODE> files with
<CODE>bzip2</CODE> compiled with Checker (a memory access error
detector, like Purify).
<LI>Misc tests to make sure it builds and runs ok on non-Linux/x86
platforms.
</OL>
These tests were conducted on a 225 MHz IDT WinChip machine, running
Linux 2.0.36. They represent nearly a week of continuous computation.
All tests completed successfully.
<P>
<HR SIZE="6">
<A NAME="SEC49"></A>
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_4.html#SEC48"> &lt; </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<H2> 4.6 Further reading </H2>
<!--docid::SEC49::-->
<CODE>bzip2</CODE> is not research work, in the sense that it doesn't present
any new ideas. Rather, it's an engineering exercise based on existing
ideas.
<P>
Four documents describe essentially all the ideas behind <CODE>bzip2</CODE>:
<TABLE><tr><td>&nbsp;</td><td class=example><pre>Michael Burrows and D. J. Wheeler:
"A block-sorting lossless data compression algorithm"
10th May 1994.
Digital SRC Research Report 124.
ftp://ftp.digital.com/pub/DEC/SRC/research-reports/SRC-124.ps.gz
If you have trouble finding it, try searching at the
New Zealand Digital Library, http://www.nzdl.org.
Daniel S. Hirschberg and Debra A. LeLewer
"Efficient Decoding of Prefix Codes"
Communications of the ACM, April 1990, Vol 33, Number 4.
You might be able to get an electronic copy of this
from the ACM Digital Library.
David J. Wheeler
Program bred3.c and accompanying document bred3.ps.
This contains the idea behind the multi-table Huffman
coding scheme.
ftp://ftp.cl.cam.ac.uk/users/djw3/
Jon L. Bentley and Robert Sedgewick
"Fast Algorithms for Sorting and Searching Strings"
Available from Sedgewick's web page,
www.cs.princeton.edu/~rs
</pre></td></tr></table>The following paper gives valuable additional insights into the
algorithm, but is not immediately the basis of any code
used in bzip2.
<TABLE><tr><td>&nbsp;</td><td class=example><pre>Peter Fenwick:
Block Sorting Text Compression
Proceedings of the 19th Australasian Computer Science Conference,
Melbourne, Australia. Jan 31 - Feb 2, 1996.
ftp://ftp.cs.auckland.ac.nz/pub/peter-f/ACSC96paper.ps
</pre></td></tr></table>Kunihiko Sadakane's sorting algorithm, mentioned above,
is available from:
<TABLE><tr><td>&nbsp;</td><td class=example><pre>http://naomi.is.s.u-tokyo.ac.jp/~sada/papers/Sada98b.ps.gz
</pre></td></tr></table>The Manber-Myers suffix array construction
algorithm is described in a paper
available from:
<TABLE><tr><td>&nbsp;</td><td class=example><pre>http://www.cs.arizona.edu/people/gene/PAPERS/suffix.ps
</pre></td></tr></table>Finally, the following paper documents some recent investigations
I made into the performance of sorting algorithms:
<TABLE><tr><td>&nbsp;</td><td class=example><pre>Julian Seward:
On the Performance of BWT Sorting Algorithms
Proceedings of the IEEE Data Compression Conference 2000
Snowbird, Utah. 28-30 March 2000.
</pre></td></tr></table></P><P>
<HR SIZE="6">
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[ &lt;&lt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ &gt;&gt; ]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT"> &nbsp; <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<BR>
<FONT SIZE="-1">
This document was generated
by <I>Julian Seward</I> on <I>January, 5 2002</I>
using <A HREF="http://www.mathematik.uni-kl.de/~obachman/Texi2html
"><I>texi2html</I></A>
</BODY>
</HTML>

View File

@ -1,201 +0,0 @@
<HTML>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<!-- Created on January, 5 2002 by texi2html 1.64 -->
<!--
Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author)
Karl Berry <karl@freefriends.org>
Olaf Bachmann <obachman@mathematik.uni-kl.de>
and many others.
Maintained by: Olaf Bachmann <obachman@mathematik.uni-kl.de>
Send bugs and suggestions to <texi2html@mathematik.uni-kl.de>
-->
<HEAD>
<TITLE>Untitled Document: About this document</TITLE>
<META NAME="description" CONTENT="Untitled Document: About this document">
<META NAME="keywords" CONTENT="Untitled Document: About this document">
<META NAME="resource-type" CONTENT="document">
<META NAME="distribution" CONTENT="global">
<META NAME="Generator" CONTENT="texi2html 1.64">
</HEAD>
<BODY LANG="" BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#800080" ALINK="#FF0000">
<A NAME="SEC_About"></A>
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<H1>About this document</H1>
This document was generated by <I>Julian Seward</I> on <I>January, 5 2002</I>
using <A HREF="http://www.mathematik.uni-kl.de/~obachman/Texi2html
"><I>texi2html</I></A>
<P></P>
The buttons in the navigation panels have the following meaning:
<P></P>
<table border = "1">
<TR>
<TH> Button </TH>
<TH> Name </TH>
<TH> Go to </TH>
<TH> From 1.2.3 go to</TH>
</TR>
<TR>
<TD ALIGN="CENTER">
[ &lt; ] </TD>
<TD ALIGN="CENTER">
Back
</TD>
<TD>
previous section in reading order
</TD>
<TD>
1.2.2
</TD>
</TR>
<TR>
<TD ALIGN="CENTER">
[ &gt; ] </TD>
<TD ALIGN="CENTER">
Forward
</TD>
<TD>
next section in reading order
</TD>
<TD>
1.2.4
</TD>
</TR>
<TR>
<TD ALIGN="CENTER">
[ &lt;&lt; ] </TD>
<TD ALIGN="CENTER">
FastBack
</TD>
<TD>
previous or up-and-previous section
</TD>
<TD>
1.1
</TD>
</TR>
<TR>
<TD ALIGN="CENTER">
[ Up ] </TD>
<TD ALIGN="CENTER">
Up
</TD>
<TD>
up section
</TD>
<TD>
1.2
</TD>
</TR>
<TR>
<TD ALIGN="CENTER">
[ &gt;&gt; ] </TD>
<TD ALIGN="CENTER">
FastForward
</TD>
<TD>
next or up-and-next section
</TD>
<TD>
1.3
</TD>
</TR>
<TR>
<TD ALIGN="CENTER">
[Top] </TD>
<TD ALIGN="CENTER">
Top
</TD>
<TD>
cover (top) of document
</TD>
<TD>
&nbsp;
</TD>
</TR>
<TR>
<TD ALIGN="CENTER">
[Contents] </TD>
<TD ALIGN="CENTER">
Contents
</TD>
<TD>
table of contents
</TD>
<TD>
&nbsp;
</TD>
</TR>
<TR>
<TD ALIGN="CENTER">
[Index] </TD>
<TD ALIGN="CENTER">
Index
</TD>
<TD>
concept index
</TD>
<TD>
&nbsp;
</TD>
</TR>
<TR>
<TD ALIGN="CENTER">
[ ? ] </TD>
<TD ALIGN="CENTER">
About
</TD>
<TD>
this page
</TD>
<TD>
&nbsp;
</TD>
</TR>
</TABLE>
<P></P>
where the <STRONG> Example </STRONG> assumes that the current position
is at <STRONG> Subsubsection One-Two-Three </STRONG> of a document of
the following structure:
<UL>
<LI> 1. Section One </LI>
<UL>
<LI>1.1 Subsection One-One</LI>
<UL>
<LI> ... </LI>
</UL>
<LI>1.2 Subsection One-Two</LI>
<UL>
<LI>1.2.1 Subsubsection One-Two-One
</LI><LI>1.2.2 Subsubsection One-Two-Two
</LI><LI>1.2.3 Subsubsection One-Two-Three &nbsp; &nbsp; <STRONG>
&lt;== Current Position </STRONG>
</LI><LI>1.2.4 Subsubsection One-Two-Four
</LI></UL>
<LI>1.3 Subsection One-Three</LI>
<UL>
<LI> ... </LI>
</UL>
<LI>1.4 Subsection One-Four</LI>
</UL>
</UL>
<HR SIZE=1>
<BR>
<FONT SIZE="-1">
This document was generated
by <I>Julian Seward</I> on <I>January, 5 2002</I>
using <A HREF="http://www.mathematik.uni-kl.de/~obachman/Texi2html
"><I>texi2html</I></A>
</BODY>
</HTML>

View File

@ -1,54 +0,0 @@
<HTML>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<!-- Created on January, 5 2002 by texi2html 1.64 -->
<!--
Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author)
Karl Berry <karl@freefriends.org>
Olaf Bachmann <obachman@mathematik.uni-kl.de>
and many others.
Maintained by: Olaf Bachmann <obachman@mathematik.uni-kl.de>
Send bugs and suggestions to <texi2html@mathematik.uni-kl.de>
-->
<HEAD>
<TITLE>Untitled Document: Short Table of Contents</TITLE>
<META NAME="description" CONTENT="Untitled Document: Short Table of Contents">
<META NAME="keywords" CONTENT="Untitled Document: Short Table of Contents">
<META NAME="resource-type" CONTENT="document">
<META NAME="distribution" CONTENT="global">
<META NAME="Generator" CONTENT="texi2html 1.64">
</HEAD>
<BODY LANG="" BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#800080" ALINK="#FF0000">
<A NAME="SEC_OVERVIEW"></A>
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<H1>Short Table of Contents</H1>
<BLOCKQUOTE>
<A NAME="TOC1" HREF="manual_1.html#SEC1">1. Introduction</A>
<BR>
<A NAME="TOC2" HREF="manual_2.html#SEC2">2. How to use <CODE>bzip2</CODE></A>
<BR>
<A NAME="TOC12" HREF="manual_3.html#SEC12">3. Programming with <CODE>libbzip2</CODE></A>
<BR>
<A NAME="TOC43" HREF="manual_4.html#SEC43">4. Miscellanea</A>
<BR>
</BLOCKQUOTE>
<HR SIZE=1>
<BR>
<FONT SIZE="-1">
This document was generated
by <I>Julian Seward</I> on <I>January, 5 2002</I>
using <A HREF="http://www.mathematik.uni-kl.de/~obachman/Texi2html
"><I>texi2html</I></A>
</BODY>
</HTML>

View File

@ -1,163 +0,0 @@
<HTML>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<!-- Created on January, 5 2002 by texi2html 1.64 -->
<!--
Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author)
Karl Berry <karl@freefriends.org>
Olaf Bachmann <obachman@mathematik.uni-kl.de>
and many others.
Maintained by: Olaf Bachmann <obachman@mathematik.uni-kl.de>
Send bugs and suggestions to <texi2html@mathematik.uni-kl.de>
-->
<HEAD>
<TITLE>Untitled Document: Table of Contents</TITLE>
<META NAME="description" CONTENT="Untitled Document: Table of Contents">
<META NAME="keywords" CONTENT="Untitled Document: Table of Contents">
<META NAME="resource-type" CONTENT="document">
<META NAME="distribution" CONTENT="global">
<META NAME="Generator" CONTENT="texi2html 1.64">
</HEAD>
<BODY LANG="" BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#800080" ALINK="#FF0000">
<A NAME="SEC_Contents"></A>
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
</TR></TABLE>
<H1>Table of Contents</H1>
<UL>
<A NAME="TOC1" HREF="manual_1.html#SEC1">1. Introduction</A>
<BR>
<A NAME="TOC2" HREF="manual_2.html#SEC2">2. How to use <CODE>bzip2</CODE></A>
<BR>
<UL>
<UL>
<UL>
<A NAME="TOC3" HREF="manual_2.html#SEC3">NAME</A>
<BR>
<A NAME="TOC4" HREF="manual_2.html#SEC4">SYNOPSIS</A>
<BR>
<A NAME="TOC5" HREF="manual_2.html#SEC5">DESCRIPTION</A>
<BR>
<A NAME="TOC6" HREF="manual_2.html#SEC6">OPTIONS</A>
<BR>
<A NAME="TOC7" HREF="manual_2.html#SEC7">MEMORY MANAGEMENT</A>
<BR>
<A NAME="TOC8" HREF="manual_2.html#SEC8">RECOVERING DATA FROM DAMAGED FILES</A>
<BR>
<A NAME="TOC9" HREF="manual_2.html#SEC9">PERFORMANCE NOTES</A>
<BR>
<A NAME="TOC10" HREF="manual_2.html#SEC10">CAVEATS</A>
<BR>
<A NAME="TOC11" HREF="manual_2.html#SEC11">AUTHOR</A>
<BR>
</UL>
</UL>
</UL>
<A NAME="TOC12" HREF="manual_3.html#SEC12">3. Programming with <CODE>libbzip2</CODE></A>
<BR>
<UL>
<A NAME="TOC13" HREF="manual_3.html#SEC13">3.1 Top-level structure</A>
<BR>
<UL>
<A NAME="TOC14" HREF="manual_3.html#SEC14">3.1.1 Low-level summary</A>
<BR>
<A NAME="TOC15" HREF="manual_3.html#SEC15">3.1.2 High-level summary</A>
<BR>
<A NAME="TOC16" HREF="manual_3.html#SEC16">3.1.3 Utility functions summary</A>
<BR>
</UL>
<A NAME="TOC17" HREF="manual_3.html#SEC17">3.2 Error handling</A>
<BR>
<A NAME="TOC18" HREF="manual_3.html#SEC18">3.3 Low-level interface</A>
<BR>
<UL>
<A NAME="TOC19" HREF="manual_3.html#SEC19">3.3.1 <CODE>BZ2_bzCompressInit</CODE></A>
<BR>
<A NAME="TOC20" HREF="manual_3.html#SEC20">3.3.2 <CODE>BZ2_bzCompress</CODE></A>
<BR>
<A NAME="TOC21" HREF="manual_3.html#SEC21">3.3.3 <CODE>BZ2_bzCompressEnd</CODE></A>
<BR>
<A NAME="TOC22" HREF="manual_3.html#SEC22">3.3.4 <CODE>BZ2_bzDecompressInit</CODE></A>
<BR>
<A NAME="TOC23" HREF="manual_3.html#SEC23">3.3.5 <CODE>BZ2_bzDecompress</CODE></A>
<BR>
<A NAME="TOC24" HREF="manual_3.html#SEC24">3.3.6 <CODE>BZ2_bzDecompressEnd</CODE></A>
<BR>
</UL>
<A NAME="TOC25" HREF="manual_3.html#SEC25">3.4 High-level interface</A>
<BR>
<UL>
<A NAME="TOC26" HREF="manual_3.html#SEC26">3.4.1 <CODE>BZ2_bzReadOpen</CODE></A>
<BR>
<A NAME="TOC27" HREF="manual_3.html#SEC27">3.4.2 <CODE>BZ2_bzRead</CODE></A>
<BR>
<A NAME="TOC28" HREF="manual_3.html#SEC28">3.4.3 <CODE>BZ2_bzReadGetUnused</CODE></A>
<BR>
<A NAME="TOC29" HREF="manual_3.html#SEC29">3.4.4 <CODE>BZ2_bzReadClose</CODE></A>
<BR>
<A NAME="TOC30" HREF="manual_3.html#SEC30">3.4.5 <CODE>BZ2_bzWriteOpen</CODE></A>
<BR>
<A NAME="TOC31" HREF="manual_3.html#SEC31">3.4.6 <CODE>BZ2_bzWrite</CODE></A>
<BR>
<A NAME="TOC32" HREF="manual_3.html#SEC32">3.4.7 <CODE>BZ2_bzWriteClose</CODE></A>
<BR>
<A NAME="TOC33" HREF="manual_3.html#SEC33">3.4.8 Handling embedded compressed data streams</A>
<BR>
<A NAME="TOC34" HREF="manual_3.html#SEC34">3.4.9 Standard file-reading/writing code</A>
<BR>
</UL>
<A NAME="TOC35" HREF="manual_3.html#SEC35">3.5 Utility functions</A>
<BR>
<UL>
<A NAME="TOC36" HREF="manual_3.html#SEC36">3.5.1 <CODE>BZ2_bzBuffToBuffCompress</CODE></A>
<BR>
<A NAME="TOC37" HREF="manual_3.html#SEC37">3.5.2 <CODE>BZ2_bzBuffToBuffDecompress</CODE></A>
<BR>
</UL>
<A NAME="TOC38" HREF="manual_3.html#SEC38">3.6 <CODE>zlib</CODE> compatibility functions</A>
<BR>
<A NAME="TOC39" HREF="manual_3.html#SEC39">3.7 Using the library in a <CODE>stdio</CODE>-free environment</A>
<BR>
<UL>
<A NAME="TOC40" HREF="manual_3.html#SEC40">3.7.1 Getting rid of <CODE>stdio</CODE></A>
<BR>
<A NAME="TOC41" HREF="manual_3.html#SEC41">3.7.2 Critical error handling</A>
<BR>
</UL>
<A NAME="TOC42" HREF="manual_3.html#SEC42">3.8 Making a Windows DLL</A>
<BR>
</UL>
<A NAME="TOC43" HREF="manual_4.html#SEC43">4. Miscellanea</A>
<BR>
<UL>
<A NAME="TOC44" HREF="manual_4.html#SEC44">4.1 Limitations of the compressed file format</A>
<BR>
<A NAME="TOC45" HREF="manual_4.html#SEC45">4.2 Portability issues</A>
<BR>
<A NAME="TOC46" HREF="manual_4.html#SEC46">4.3 Reporting bugs</A>
<BR>
<A NAME="TOC47" HREF="manual_4.html#SEC47">4.4 Did you get the right package?</A>
<BR>
<A NAME="TOC48" HREF="manual_4.html#SEC48">4.5 Testing</A>
<BR>
<A NAME="TOC49" HREF="manual_4.html#SEC49">4.6 Further reading</A>
<BR>
</UL>
</UL>
<HR SIZE=1>
<BR>
<FONT SIZE="-1">
This document was generated
by <I>Julian Seward</I> on <I>January, 5 2002</I>
using <A HREF="http://www.mathematik.uni-kl.de/~obachman/Texi2html
"><I>texi2html</I></A>
</BODY>
</HTML>

16
mk251.c
View File

@ -1,16 +0,0 @@
/* Spew out a long sequence of the byte 251. When fed to bzip2
versions 1.0.0 or 1.0.1, causes it to die with internal error
1007 in blocksort.c. This assertion misses an extremely rare
case, which is fixed in this version (1.0.2) and above.
*/
#include <stdio.h>
int main ()
{
int i;
for (i = 0; i < 48500000 ; i++)
putchar(251);
return 0;
}

View File

@ -63,7 +63,11 @@
/*---------------------------------------------*/
Int32 BZ2_rNums[512] = {
#ifdef __ORCAC__
Int16 BZ2_rNums[512] = {
#else
Int32 BZ2_rNums[512] = {
#endif
619, 720, 127, 481, 931, 816, 813, 233, 566, 247,
985, 724, 205, 454, 863, 491, 741, 242, 949, 214,
733, 859, 335, 708, 621, 574, 73, 654, 730, 472,

39
spewG.c
View File

@ -1,39 +0,0 @@
/* spew out a thoroughly gigantic file designed so that bzip2
can compress it reasonably rapidly. This is to help test
support for large files (> 2GB) in a reasonable amount of time.
I suggest you use the undocumented --exponential option to
bzip2 when compressing the resulting file; this saves a bit of
time. Note: *don't* bother with --exponential when compressing
Real Files; it'll just waste a lot of CPU time :-)
(but is otherwise harmless).
*/
#define _FILE_OFFSET_BITS 64
#include <stdio.h>
#include <stdlib.h>
/* The number of megabytes of junk to spew out (roughly) */
#define MEGABYTES 5000
#define N_BUF 1000000
char buf[N_BUF];
int main ( int argc, char** argv )
{
int ii, kk, p;
srandom(1);
setbuffer ( stdout, buf, N_BUF );
for (kk = 0; kk < MEGABYTES * 515; kk+=3) {
p = 25+random()%50;
for (ii = 0; ii < p; ii++)
printf ( "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" );
for (ii = 0; ii < p-1; ii++)
printf ( "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" );
for (ii = 0; ii < p+1; ii++)
printf ( "ccccccccccccccccccccccccccccccccccccc" );
}
fflush(stdout);
return 0;
}

58
stristr.c Normal file
View File

@ -0,0 +1,58 @@
/* Case-insensitive version of strstr() obtained from http://snippets.org */
#ifdef __ORCAC__
segment "bzip2";
#endif
/*
** Designation: stristr
**
** Call syntax: char *stristr(char *String, char *Pattern)
**
** Description: This function is an ANSI version of strstr() with
** case insensitivity. (Functionally equivalent to
** the strcasestr function in some C libraries.)
**
** Return item: char *pointer if Pattern is found in String, else
** null pointer
**
** Rev History: 07/06/03 Stephen Heumann Used in bunzip2 for GNO
** 16/04/03 ? ?
** 16/07/97 Greg Thayer Optimized
** 07/04/95 Bob Stout ANSI-fy
** 02/03/94 Fred Cole Original
**
** Hereby donated to public domain.
*/
#include <stdio.h>
#include <string.h>
#include <ctype.h>
char *stristr(const char *String, const char *Pattern)
{
char *pptr, *sptr, *start;
for (start = (char *)String; *start != '\0'; start++)
{
/* find start of pattern in string */
for ( ; ((*start!='\0') && (toupper(*start) != toupper(*Pattern)));
start++)
;
pptr = (char *)Pattern;
sptr = (char *)start;
while (toupper(*sptr) == toupper(*pptr))
{
sptr++;
pptr++;
/* if end of pattern then pattern was found */
if ('\0' == *pptr)
return (start);
}
}
return NULL;
}

View File

@ -1,126 +0,0 @@
/* A test program written to test robustness to decompression of
corrupted data. Usage is
unzcrash filename
and the program will read the specified file, compress it (in memory),
and then repeatedly decompress it, each time with a different bit of
the compressed data inverted, so as to test all possible one-bit errors.
This should not cause any invalid memory accesses. If it does,
I want to know about it!
p.s. As you can see from the above description, the process is
incredibly slow. A file of size eg 5KB will cause it to run for
many hours.
*/
#include <stdio.h>
#include <assert.h>
#include "bzlib.h"
#define M_BLOCK 1000000
typedef unsigned char uchar;
#define M_BLOCK_OUT (M_BLOCK + 1000000)
uchar inbuf[M_BLOCK];
uchar outbuf[M_BLOCK_OUT];
uchar zbuf[M_BLOCK + 600 + (M_BLOCK / 100)];
int nIn, nOut, nZ;
static char *bzerrorstrings[] = {
"OK"
,"SEQUENCE_ERROR"
,"PARAM_ERROR"
,"MEM_ERROR"
,"DATA_ERROR"
,"DATA_ERROR_MAGIC"
,"IO_ERROR"
,"UNEXPECTED_EOF"
,"OUTBUFF_FULL"
,"???" /* for future */
,"???" /* for future */
,"???" /* for future */
,"???" /* for future */
,"???" /* for future */
,"???" /* for future */
};
void flip_bit ( int bit )
{
int byteno = bit / 8;
int bitno = bit % 8;
uchar mask = 1 << bitno;
//fprintf ( stderr, "(byte %d bit %d mask %d)",
// byteno, bitno, (int)mask );
zbuf[byteno] ^= mask;
}
int main ( int argc, char** argv )
{
FILE* f;
int r;
int bit;
int i;
if (argc != 2) {
fprintf ( stderr, "usage: unzcrash filename\n" );
return 1;
}
f = fopen ( argv[1], "r" );
if (!f) {
fprintf ( stderr, "unzcrash: can't open %s\n", argv[1] );
return 1;
}
nIn = fread ( inbuf, 1, M_BLOCK, f );
fprintf ( stderr, "%d bytes read\n", nIn );
nZ = M_BLOCK;
r = BZ2_bzBuffToBuffCompress (
zbuf, &nZ, inbuf, nIn, 9, 0, 30 );
assert (r == BZ_OK);
fprintf ( stderr, "%d after compression\n", nZ );
for (bit = 0; bit < nZ*8; bit++) {
fprintf ( stderr, "bit %d ", bit );
flip_bit ( bit );
nOut = M_BLOCK_OUT;
r = BZ2_bzBuffToBuffDecompress (
outbuf, &nOut, zbuf, nZ, 0, 0 );
fprintf ( stderr, " %d %s ", r, bzerrorstrings[-r] );
if (r != BZ_OK) {
fprintf ( stderr, "\n" );
} else {
if (nOut != nIn) {
fprintf(stderr, "nIn/nOut mismatch %d %d\n", nIn, nOut );
return 1;
} else {
for (i = 0; i < nOut; i++)
if (inbuf[i] != outbuf[i]) {
fprintf(stderr, "mismatch at %d\n", i );
return 1;
}
if (i == nOut) fprintf(stderr, "really ok!\n" );
}
}
flip_bit ( bit );
}
#if 0
assert (nOut == nIn);
for (i = 0; i < nOut; i++) {
if (inbuf[i] != outbuf[i]) {
fprintf ( stderr, "difference at %d !\n", i );
return 1;
}
}
#endif
fprintf ( stderr, "all ok\n" );
return 0;
}

5
words0
View File

@ -1,5 +0,0 @@
If compilation produces errors, or a large number of warnings,
please read README.COMPILATION.PROBLEMS -- you might be able to
adjust the flags in this Makefile to improve matters.

4
words1
View File

@ -1,4 +1,4 @@
Doing 6 tests (3 compress, 3 uncompress) ...
Doing 3 decompression tests ...
If there's a problem, things might stop at this point.

6
words2
View File

@ -1,5 +1,3 @@
Checking test results. If any of the four "cmp"s which follow
report any differences, something is wrong. If you can't easily
figure out what, please let me know (jseward@acm.org).
Checking test results. If any of the three "cmp"s which
follow report any differences, something is wrong.

33
words3
View File

@ -1,23 +1,16 @@
If you got this far and the "cmp"s didn't complain, it looks
like you're in business.
like you're in business.
To install in /usr/bin, /usr/lib, /usr/man and /usr/include, type
make install
To install somewhere else, eg, /xxx/yyy/{bin,lib,man,include}, type
make install PREFIX=/xxx/yyy
If you are (justifiably) paranoid and want to see what 'make install'
is going to do, you can first do
make -n install or
make -n install PREFIX=/xxx/yyy respectively.
The -n instructs make to show the commands it would execute, but
not actually execute them.
Instructions for use are in the preformatted manual page, in the file
bzip2.txt. For more detailed documentation, read the full manual.
It is available in Postscript form (manual.ps), PDF form (manual.pdf),
and HTML form (manual_toc.html).
You can also do "bzip2 --help" to see some helpful information.
"bzip2 -L" displays the software license.
To install in /usr/local/bin and /usr/local/man, type
dmake justinstall
To install somewhere else, eg, /xxx/yyy/{bin,man}, type
dmake justinstall PREFIX=/xxx/yyy
If you are (justifiably) paranoid and want to see what
'dmake install' is going to do, type
dmake -n justinstall or
dmake -n justinstall PREFIX=/xxx/yyy repsectively.
The -n instructs make to show the commands it would
execute, but not actually execute them.
You can do "bunzip2 --help" to see some helpful information.
"bunzip2 -L" displays the software license.

10
words4 Normal file
View File

@ -0,0 +1,10 @@
If all went well, bunzip2 (and bzip2recover) should
now be installed under your /usr/local hierarchy.
Put the following line in your gshrc file so you can use bzcat:
alias bzcat "bunzip2 -c"
Instructions for use are in the man page for bunzip2. Type
man bunzip2
to read it.