GNO port source code (from 2003)

This commit is contained in:
Stephen Heumann 2015-10-10 14:36:21 -05:00
parent 18bf65b3db
commit 160de28119
56 changed files with 1396 additions and 14474 deletions

220
Makefile
View File

@ -1,192 +1,94 @@
# Makefile for bunzip2 for GNO (for use with dmake)
# Based on Unix Makefile for bzip2
# Modified for GNO by Stephen Heumann
SHELL=/bin/sh
# ORCA/C 2.1.0 may need more than 8 megabytes of RAM to compile decompress.c
# with full optimization enabled. Thus, this makefile can only
# be used as is on an emulated system with 14 megabyte RAM support.
# To assist in cross-compiling
CC=gcc
AR=ar
RANLIB=ranlib
# Uncomment this if make doesn't have the $CC variable set appropriately
# CC=occ
RM=cp -p rm
LDFLAGS=
# Suitably paranoid flags to avoid bugs in gcc-2.7
BIGFILES=-D_FILE_OFFSET_BITS=64
CFLAGS=-Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce $(BIGFILES)
# The "-I /usr/include" shouldn't be needed but seemed to fix problems for me
CFLAGS=-a0 -w -O -I /usr/include
NOROOTFLAG=-r
# Where you want it installed when you do 'make install'
PREFIX=/usr
PREFIX=/usr/local
OBJS= blocksort.o \
OBJS= stristr.o \
huffman.o \
crctable.o \
randtable.o \
compress.o \
decompress.o \
bzlib.o
all: libbz2.a bzip2 bzip2recover test
all: bunzip2 bzip2recover test
bzip2: libbz2.a bzip2.o
$(CC) $(CFLAGS) $(LDFLAGS) -o bzip2 bzip2.o -L. -lbz2
bunzip2: bzip2.o $(OBJS)
$(CC) -o bunzip2 bunzip2.rez
$(CC) $(CFLAGS) $(LDFLAGS) bzip2.o $(OBJS) -o bunzip2
bzip2recover: bzip2recover.o
$(CC) $(CFLAGS) $(LDFLAGS) -o bzip2recover bzip2recover.o
libbz2.a: $(OBJS)
rm -f libbz2.a
$(AR) cq libbz2.a $(OBJS)
@if ( test -f $(RANLIB) -o -f /usr/bin/ranlib -o \
-f /bin/ranlib -o -f /usr/ccs/bin/ranlib ) ; then \
echo $(RANLIB) libbz2.a ; \
$(RANLIB) libbz2.a ; \
fi
$(CC) -o bzip2recover bzip2recover.rez
$(CC) $(CFLAGS) $(LDFLAGS) bzip2recover.o -o bzip2recover
check: test
test: bzip2
test: bunzip2
@cat words1
./bzip2 -1 < sample1.ref > sample1.rb2
./bzip2 -2 < sample2.ref > sample2.rb2
./bzip2 -3 < sample3.ref > sample3.rb2
./bzip2 -d < sample1.bz2 > sample1.tst
./bzip2 -d < sample2.bz2 > sample2.tst
./bzip2 -ds < sample3.bz2 > sample3.tst
cmp sample1.bz2 sample1.rb2
cmp sample2.bz2 sample2.rb2
cmp sample3.bz2 sample3.rb2
./bunzip2 -dk < sample1.bz2 > sample1.tst
./bunzip2 -dk < sample2.bz2 > sample2.tst
./bunzip2 -dks < sample3.bz2 > sample3.tst
@cat words2
cmp sample1.tst sample1.ref
cmp sample2.tst sample2.ref
cmp sample3.tst sample3.ref
@cat words3
install: bzip2 bzip2recover
if ( test ! -d $(PREFIX)/bin ) ; then mkdir -p $(PREFIX)/bin ; fi
if ( test ! -d $(PREFIX)/lib ) ; then mkdir -p $(PREFIX)/lib ; fi
if ( test ! -d $(PREFIX)/man ) ; then mkdir -p $(PREFIX)/man ; fi
if ( test ! -d $(PREFIX)/man/man1 ) ; then mkdir -p $(PREFIX)/man/man1 ; fi
if ( test ! -d $(PREFIX)/include ) ; then mkdir -p $(PREFIX)/include ; fi
cp -f bzip2 $(PREFIX)/bin/bzip2
cp -f bzip2 $(PREFIX)/bin/bunzip2
cp -f bzip2 $(PREFIX)/bin/bzcat
install: bunzip2 bzip2recover test justinstall
justinstall:
# This should install bunzip2 for GNO under /usr/local
mkdir $(PREFIX)/bin >& .null
mkdir $(PREFIX)/man >& .null
mkdir $(PREFIX)/man/man1 >& .null
cp -f bunzip2 $(PREFIX)/bin/bunzip2
cp -f bzip2recover $(PREFIX)/bin/bzip2recover
chmod a+x $(PREFIX)/bin/bzip2
chmod a+x $(PREFIX)/bin/bunzip2
chmod a+x $(PREFIX)/bin/bzcat
chmod a+x $(PREFIX)/bin/bzip2recover
cp -f bzip2.1 $(PREFIX)/man/man1
chmod a+r $(PREFIX)/man/man1/bzip2.1
cp -f bzlib.h $(PREFIX)/include
chmod a+r $(PREFIX)/include/bzlib.h
cp -f libbz2.a $(PREFIX)/lib
chmod a+r $(PREFIX)/lib/libbz2.a
cp -f bzgrep $(PREFIX)/bin/bzgrep
ln $(PREFIX)/bin/bzgrep $(PREFIX)/bin/bzegrep
ln $(PREFIX)/bin/bzgrep $(PREFIX)/bin/bzfgrep
chmod a+x $(PREFIX)/bin/bzgrep
cp -f bzmore $(PREFIX)/bin/bzmore
ln $(PREFIX)/bin/bzmore $(PREFIX)/bin/bzless
chmod a+x $(PREFIX)/bin/bzmore
cp -f bzdiff $(PREFIX)/bin/bzdiff
ln $(PREFIX)/bin/bzdiff $(PREFIX)/bin/bzcmp
chmod a+x $(PREFIX)/bin/bzdiff
cp -f bzgrep.1 bzmore.1 bzdiff.1 $(PREFIX)/man/man1
chmod a+r $(PREFIX)/man/man1/bzgrep.1
chmod a+r $(PREFIX)/man/man1/bzmore.1
chmod a+r $(PREFIX)/man/man1/bzdiff.1
echo ".so man1/bzgrep.1" > $(PREFIX)/man/man1/bzegrep.1
echo ".so man1/bzgrep.1" > $(PREFIX)/man/man1/bzfgrep.1
echo ".so man1/bzmore.1" > $(PREFIX)/man/man1/bzless.1
echo ".so man1/bzdiff.1" > $(PREFIX)/man/man1/bzcmp.1
cp -f bunzip2.1 $(PREFIX)/man/man1/bunzip2.1
cp -f bzip2recover.1 $(PREFIX)/man/man1/bzip2recover.1
cp -f bzcat.1 $(PREFIX)/man/man1/bzcat.1
@cat words4
distclean: clean
clean:
rm -f *.o libbz2.a bzip2 bzip2recover \
sample1.rb2 sample2.rb2 sample3.rb2 \
clean:
$(RM) -f *.o *.a *.sym *.root bunzip2 bzip2recover \
sample1.tst sample2.tst sample3.tst
blocksort.o: blocksort.c
@cat words0
$(CC) $(CFLAGS) -c blocksort.c
huffman.o: huffman.c
$(CC) $(CFLAGS) -c huffman.c
crctable.o: crctable.c
$(CC) $(CFLAGS) -c crctable.c
randtable.o: randtable.c
$(CC) $(CFLAGS) -c randtable.c
compress.o: compress.c
$(CC) $(CFLAGS) -c compress.c
decompress.o: decompress.c
$(CC) $(CFLAGS) -c decompress.c
bzlib.o: bzlib.c
$(CC) $(CFLAGS) -c bzlib.c
bzip2.o: bzip2.c
$(CC) $(CFLAGS) -c bzip2.c
stristr.o: stristr.c
$(CC) $(CFLAGS) $(NOROOTFLAG) -c stristr.c
huffman.o: huffman.c bzlib_private.h
$(CC) $(CFLAGS) $(NOROOTFLAG) -c huffman.c
crctable.o: crctable.c bzlib_private.h
$(CC) $(CFLAGS) $(NOROOTFLAG) -c crctable.c
randtable.o: randtable.c bzlib_private.h
$(CC) $(CFLAGS) $(NOROOTFLAG) -c randtable.c
decompress.o: decompress.c bzlib_private.h
$(CC) $(CFLAGS) $(NOROOTFLAG) -c decompress.c
bzlib.o: bzlib.c bzlib_private.h
$(CC) $(CFLAGS) $(NOROOTFLAG) -c bzlib.c
bzip2.o: bzip2.c bzlib.h
$(CC) $(CFLAGS) -s 2048 -C1 -c bzip2.c
# $(CC) $(CFLAGS) -C1 -D __STACK_CHECK__ -c bzip2.c
bzip2recover.o: bzip2recover.c
$(CC) $(CFLAGS) -c bzip2recover.c
$(CC) $(CFLAGS) -s 1024 -c bzip2recover.c
# $(CC) $(CFLAGS) -D __STACK_CHECK__ -c bzip2recover.c
bzlib_private.h: bzlib.h
DISTNAME=bzip2-1.0.2
tarfile:
rm -f $(DISTNAME)
ln -sf . $(DISTNAME)
tar cvf $(DISTNAME).tar \
$(DISTNAME)/blocksort.c \
$(DISTNAME)/huffman.c \
$(DISTNAME)/crctable.c \
$(DISTNAME)/randtable.c \
$(DISTNAME)/compress.c \
$(DISTNAME)/decompress.c \
$(DISTNAME)/bzlib.c \
$(DISTNAME)/bzip2.c \
$(DISTNAME)/bzip2recover.c \
$(DISTNAME)/bzlib.h \
$(DISTNAME)/bzlib_private.h \
$(DISTNAME)/Makefile \
$(DISTNAME)/manual.texi \
$(DISTNAME)/manual.ps \
$(DISTNAME)/manual.pdf \
$(DISTNAME)/LICENSE \
$(DISTNAME)/bzip2.1 \
$(DISTNAME)/bzip2.1.preformatted \
$(DISTNAME)/bzip2.txt \
$(DISTNAME)/words0 \
$(DISTNAME)/words1 \
$(DISTNAME)/words2 \
$(DISTNAME)/words3 \
$(DISTNAME)/sample1.ref \
$(DISTNAME)/sample2.ref \
$(DISTNAME)/sample3.ref \
$(DISTNAME)/sample1.bz2 \
$(DISTNAME)/sample2.bz2 \
$(DISTNAME)/sample3.bz2 \
$(DISTNAME)/dlltest.c \
$(DISTNAME)/*.html \
$(DISTNAME)/README \
$(DISTNAME)/README.COMPILATION.PROBLEMS \
$(DISTNAME)/CHANGES \
$(DISTNAME)/libbz2.def \
$(DISTNAME)/libbz2.dsp \
$(DISTNAME)/dlltest.dsp \
$(DISTNAME)/makefile.msc \
$(DISTNAME)/Y2K_INFO \
$(DISTNAME)/unzcrash.c \
$(DISTNAME)/spewG.c \
$(DISTNAME)/mk251.c \
$(DISTNAME)/bzdiff \
$(DISTNAME)/bzdiff.1 \
$(DISTNAME)/bzmore \
$(DISTNAME)/bzmore.1 \
$(DISTNAME)/bzgrep \
$(DISTNAME)/bzgrep.1 \
$(DISTNAME)/Makefile-libbz2_so
gzip -v $(DISTNAME).tar
# For rebuilding the manual from sources on my RedHat 7.2 box
manual: manual.ps manual.pdf manual.html
manual.ps: manual.texi
tex manual.texi
dvips -o manual.ps manual.dvi
manual.pdf: manual.ps
ps2pdf manual.ps
manual.html: manual.texi
texi2html -split_chapter manual.texi
chtyp:
chtyp -l cc *.c *.h

View File

@ -1,44 +0,0 @@
# This Makefile builds a shared version of the library,
# libbz2.so.1.0.2, with soname libbz2.so.1.0,
# at least on x86-Linux (RedHat 7.2),
# with gcc-2.96 20000731 (Red Hat Linux 7.1 2.96-98).
# Please see the README file for some
# important info about building the library like this.
SHELL=/bin/sh
CC=gcc
BIGFILES=-D_FILE_OFFSET_BITS=64
CFLAGS=-fpic -fPIC -Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce $(BIGFILES)
OBJS= blocksort.o \
huffman.o \
crctable.o \
randtable.o \
compress.o \
decompress.o \
bzlib.o
all: $(OBJS)
$(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.2 $(OBJS)
$(CC) $(CFLAGS) -o bzip2-shared bzip2.c libbz2.so.1.0.2
rm -f libbz2.so.1.0
ln -s libbz2.so.1.0.2 libbz2.so.1.0
clean:
rm -f $(OBJS) bzip2.o libbz2.so.1.0.2 libbz2.so.1.0 bzip2-shared
blocksort.o: blocksort.c
$(CC) $(CFLAGS) -c blocksort.c
huffman.o: huffman.c
$(CC) $(CFLAGS) -c huffman.c
crctable.o: crctable.c
$(CC) $(CFLAGS) -c crctable.c
randtable.o: randtable.c
$(CC) $(CFLAGS) -c randtable.c
compress.o: compress.c
$(CC) $(CFLAGS) -c compress.c
decompress.o: decompress.c
$(CC) $(CFLAGS) -c decompress.c
bzlib.o: bzlib.c
$(CC) $(CFLAGS) -c bzlib.c

View File

@ -1,130 +0,0 @@
bzip2-1.0 should compile without problems on the vast majority of
platforms. Using the supplied Makefile, I've built and tested it
myself for x86-linux, sparc-solaris, alpha-linux, x86-cygwin32 and
alpha-tru64unix. With makefile.msc, Visual C++ 6.0 and nmake, you can
build a native Win32 version too. Large file support seems to work
correctly on at least alpha-tru64unix and x86-cygwin32 (on Windows
2000).
When I say "large file" I mean a file of size 2,147,483,648 (2^31)
bytes or above. Many older OSs can't handle files above this size,
but many newer ones can. Large files are pretty huge -- most files
you'll encounter are not Large Files.
Earlier versions of bzip2 (0.1, 0.9.0, 0.9.5) compiled on a wide
variety of platforms without difficulty, and I hope this version will
continue in that tradition. However, in order to support large files,
I've had to include the define -D_FILE_OFFSET_BITS=64 in the Makefile.
This can cause problems.
The technique of adding -D_FILE_OFFSET_BITS=64 to get large file
support is, as far as I know, the Recommended Way to get correct large
file support. For more details, see the Large File Support
Specification, published by the Large File Summit, at
http://www.sas.com/standard/large.file/
As a general comment, if you get compilation errors which you think
are related to large file support, try removing the above define from
the Makefile, ie, delete the line
BIGFILES=-D_FILE_OFFSET_BITS=64
from the Makefile, and do 'make clean ; make'. This will give you a
version of bzip2 without large file support, which, for most
applications, is probably not a problem.
Alternatively, try some of the platform-specific hints listed below.
You can use the spewG.c program to generate huge files to test bzip2's
large file support, if you are feeling paranoid. Be aware though that
any compilation problems which affect bzip2 will also affect spewG.c,
alas.
Known problems as of 1.0pre8:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* HP/UX 10.20 and 11.00, using gcc (2.7.2.3 and 2.95.2): A large
number of warnings appear, including the following:
/usr/include/sys/resource.h: In function `getrlimit':
/usr/include/sys/resource.h:168:
warning: implicit declaration of function `__getrlimit64'
/usr/include/sys/resource.h: In function `setrlimit':
/usr/include/sys/resource.h:170:
warning: implicit declaration of function `__setrlimit64'
This would appear to be a problem with large file support, header
files and gcc. gcc may or may not give up at this point. If it
fails, you might be able to improve matters by adding
-D__STDC_EXT__=1
to the BIGFILES variable in the Makefile (ie, change its definition
to
BIGFILES=-D_FILE_OFFSET_BITS=64 -D__STDC_EXT__=1
Even if gcc does produce a binary which appears to work (ie passes
its self-tests), you might want to test it to see if it works properly
on large files.
* HP/UX 10.20 and 11.00, using HP's cc compiler.
No specific problems for this combination, except that you'll need to
specify the -Ae flag, and zap the gcc-specific stuff
-Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce.
You should retain -D_FILE_OFFSET_BITS=64 in order to get large
file support -- which is reported to work ok for this HP/UX + cc
combination.
* SunOS 4.1.X.
Amazingly, there are still people out there using this venerable old
banger. I shouldn't be too rude -- I started life on SunOS, and
it was a pretty darn good OS, way back then. Anyway:
SunOS doesn't seem to have strerror(), so you'll have to use
perror(), perhaps by doing adding this (warning: UNTESTED CODE):
char* strerror ( int errnum )
{
if (errnum < 0 || errnum >= sys_nerr)
return "Unknown error";
else
return sys_errlist[errnum];
}
Or you could comment out the relevant calls to strerror; they're
not mission-critical. Or you could upgrade to Solaris. Ha ha ha!
(what?? you think I've got Bad Attitude?)
* Making a shared library on Solaris. (Not really a compilation
problem, but many people ask ...)
Firstly, if you have Solaris 8, either you have libbz2.so already
on your system, or you can install it from the Solaris CD.
Secondly, be aware that there are potential naming conflicts
between the .so file supplied with Solaris 8, and the .so file
which Makefile-libbz2_so will make. Makefile-libbz2_so creates
a .so which has the names which I intend to be "official" as
of version 1.0.0 and onwards. Unfortunately, the .so in
Solaris 8 appeared before I decided on the final names, so
the two libraries are incompatible. We have since communicated
and I hope that the problems will have been solved in the next
version of Solaris, whenever that might appear.
All that said: you might be able to get somewhere
by finding the line in Makefile-libbz2_so which says
$(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.2 $(OBJS)
and replacing with
$(CC) -G -shared -o libbz2.so.1.0.2 -h libbz2.so.1.0 $(OBJS)
If gcc objects to the combination -fpic -fPIC, get rid of
the second one, leaving just "-fpic".
That's the end of the currently known compilation problems.

158
README.GNO Normal file
View File

@ -0,0 +1,158 @@
README FOR BUNZIP2 FOR GNO
==========================
This is a port of the bunzip2 archive decompression program to the GNO
environment on the Apple IIgs. It is based on Julian Seward's original
bzip2 program, but it includes only the decompression (and testing)
functionality; compression is disabled. This archive also includes the
bzip2recover program, which may allow you to recover some data from a
partially corrupted bzip2 archive file. These programs correspond to
Julian Seward's bzip2 version 1.0.2.
REQUIREMENTS
============
Bunzip2 reguires a ROM 01 or ROM 3 Apple IIgs (or an emulator thereof)
running IIgs System Software 6.0.1 and GNO 2.0.6 (or later).
Bunzip2 also needs considerable memory. It will not be able to decompress
most archives if you have less than 4 megabytes of RAM. On 4-5 MB
systems, you will likely have to specify the -s option to minimize memory
usage; on an 8MB (or 14MB) system, this will probably not be necessary,
unless you have a very large number of system extensions or other programs
running under GNO. See the manpage for more details on memory usage.
If bunzip2 gives you an out-of-memory error the first time you run it, try
again. The first attempt may have caused the system to reorganize memory
and purge unneeded data, freeing up enough space to run bunzip2 on the
second attempt.
Bunzip2 will also benefit from an accelerator, although one is obviously
not required. Even with an accelerator, it can be rather slow when
decompressing larger archives. Be prepared to wait a very long time
(several hours or even longer) for bunzip2 to finish decompressing large
bzip2 archives.
INSTALLATION
============
To install bunzip2, simply run "dmake justinstall". Alternatively, you can
install it manually: copy the bunzip2 and bzip2recover programs to your GNO
installation's /usr/local/bin directory, and copy the bunzip2.1, bzcat.1,
and bzip2recover.1 manpages to the /usr/local/man/man1 directory.
After installing bunzip2, you should read the manpage for directions on how
to use it. You can put the following line in your gshrc file so you can use
'bzcat' as documented in the manpage:
alias bzcat "bunzip2 -c"
NOTES ON THE SOURCE CODE
========================
[If you just want to use bunzip2, you do not need to read this section.]
Please note that a couple source files use non-ProDOS compatible filenames.
If you do not have an HFS or AppleShare partition available, these can
easily be changed to fit ProDOS conventions.
I had to make several changes to the bzip2 program when porting it to GNO.
The code is not very good-looking, but it does compile without warnings.
First, I disabled the compression functionality and set up the program to
decompress by default (and I renamed the binary to 'bunzip2' to reflect
this). The compression functionality is not very important on the GS, since
bzip2 is not a very good choice for compressing GS-specific data; ShrinkIt
will be much faster and preserves GS-specific file attributes. Even if you
want to create archives for use on UNIX-like systems, compress or gzip is
a better choice, and both are already available under GNO. For these
reasons, and because it reduced the amount of code that I had to modify, I
removed the compression functionality from bunzip2.
Other major changes to the code fell into several categories:
(1) Type sizes: Most of the code used defines for types such as Int32, making
it easy to adapt to the GS's 16-bit ints. The interface between the
bzip2 program and code designed to be compiled as 'libbzip2,' however,
assumes that int is 32 bits, so I had to modify it to use the appropriate
integer types on the GS. There were also silent assumptions in some
other areas that native ints are 32 bits, and I had to identify and
correct these. There were also variables specified as 'Int32' even
though 16 bits were sufficient to represent their possible range of
values; when I noticed these variables, I changed them appropriately.
(2) ORCA/C compiler limitations: ORCA/C in its 'small mode' (the only one
supported by the GNO libraries) places a 64k restriction on the size
of data structures that can be addressed as arrays. This is a problem
with bunzip2, which allocates and uses multi-megabyte data structures.
To work around this, I changed array-style references to these data
structures to use printer arithmetic instead, working around the
limitation (eg. I changed references to 'a[b]' to '*(a+b)'. ). I also
changed large local variables to be static or dynamically allocated
in order to avoid excessive stack usage.
(3) ORCA/C compiler bugs: In several cases ORCA/C 2.1.0 generated bad code
at the maximum optimization level. Most instances where reduced
optimization levels are used are necessary to work around bugs encountered
when using the disabled optimizations. Also, the size of the main
decompression function in decompress.c stresses ORCA/C. I modified
the GET_BITS macro to reduce the code size of the BZ2_decompress function
by making some of the code into a separate function. If this is not done
or if optimization is not enabled (increasing the compiled code size
as compared to when optimization is enabled), the compiler will crash,
give an error, or generate bad object code that gives linker errors.
(4) Modifications to work well with GNO and GS/OS These include setting the
output filetype and disabling newline translation in GNO's stdio
implementation. I also set the stack sizes of the programs to
appropriate values and enabled stack checking for the small recursive
segment of the program (although it shouldn't actually pose any problem).
Additionally, I changed filename operations to be case-insensitive,
reflecting the case-insensitive nature of filesystems in the Apple IIgs.
I made most modifications conditional on the __appleiigs__, __ORCAC__, or
__GNO__ macros. Which macro I used gives some hint at the reason for each
modification, although all or none should be used to produce a working
executable (changes conditionalized on one macro may depend on those
conditionalized on another).
COMPILING
=========
The included Makefile can be used with dmake, occ, and ORCA/C 2.1.0, all of
which should be installed in your GNO 2.0.6 installation. You will also need
a copy of the lsaneglue library (which is missing from the default GNO 2.0.6
installation) to be present in your GNO /lib directory. Run 'dmake bunzip2'
to build the main program or 'dmake test' to build both programs and run a
simple test to ensure that bunzip2 is working correctly.
There are some special considerations necessary when compiling the file
decompress.c. As noted above, it must be compiled with (nearly) full
optimization to compile properly. To compile it with full optimization using
ORCA/C 2.1.0, however, requires more than 8MB of memory. Thus, decompress.c
(and by extension the bunzip2 program as a whole) can only be compiled on an
emulator with 14MB memory support enabled. The only emulators that presently
support this are Bernie ][ The Rescue and Sweet16. I have included a
prebuilt object file (decompress.o) so that you can rebuild bunzip2 with
changes to other source files using a real IIgs.
AREAS FOR IMPROVEMENT
=====================
* Resource forks and GS/OS filetypes are not supported. This is not a major
problem; other programs such as ShrinkIt should be used for GS-specific
archives.
* Compression could be reenabled. This would require adapting the compression
and block sorting routines to work properly under GNO on the GS.
* Some or all of the program could be rewritten in assembly language. This
would improve its performance by some amount, although I don't know how
much. It also might reduce memory usage. This would require a full
understanding of the BWT compression and decompression algorithms used in
bzip2, which I do not presently possess.
SUPPORT
=======
I can be contacted by email at sheumann@myrealbox.com . Please contect me,
rather than Julian Seward, about any problems that you are experiencing only
in the GNO version of bunzip2.
--
Stephen Heumann <sheumann@myrealbox.com>

View File

@ -1,34 +0,0 @@
Y2K status of bzip2 and libbzip2, versions 0.1, 0.9.0 and 0.9.5
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Informally speaking:
bzip2 is a compression program built on top of libbzip2,
a library which does the real work of compression and
decompression. As far as I am aware, libbzip2 does not have
any date-related code at all.
bzip2 itself copies dates from source to destination files
when compressing or decompressing, using the 'stat' and 'utime'
UNIX system calls. It doesn't examine, manipulate or store the
dates in any way. So as far as I can see, there shouldn't be any
problem with bzip2 providing 'stat' and 'utime' work correctly
on your system.
On non-unix platforms (those for which BZ_UNIX in bzip2.c is
not set to 1), bzip2 doesn't even do the date copying.
Overall, informally speaking, I don't think bzip2 or libbzip2
have a Y2K problem.
Formally speaking:
I am not prepared to offer you any assurance whatsoever
regarding Y2K issues in my software. You alone assume the
entire risk of using the software. The disclaimer of liability
in the LICENSE file in the bzip2 source distribution continues
to apply on this issue as with every other issue pertaining
to the software.
Julian Seward
Cambridge, UK
25 August 1999

File diff suppressed because it is too large Load Diff

362
bunzip2.1 Normal file
View File

@ -0,0 +1,362 @@
.TH BUNZIP2 1 "9 June 2003"
.SH NAME
bunzip2 \- a block-sorting file decompressor, v1.0.2gs1
.br
bzcat \- decompresses files to stdout
.br
bzip2recover \- recovers data from damaged bzip2 files
.SH SYNOPSIS
.br
.B bunzip2
.RB [ " \-fkvsVL " ]
[
.I "filenames \&..."
]
.br
.B bzcat
.RB [ " \-s " ]
[
.I "filenames \&..."
]
.br
.B bzip2recover
.I "filename"
.SH DESCRIPTION
.I bunzip2
decompresses files created by
.I bzip2
using the Burrows-Wheeler block sorting
text compression algorithm, and Huffman coding.
.I bzip2
generally achieves
considerably better compression than that achieved by more conventional
LZ77/LZ78-based compressors, and approaches the performance of the PPM
family of statistical compressors.
.LP
The command-line options are deliberately very similar to
those of
.I GNU
.I gunzip,
but they are not identical.
.LP
.I bunzip2
will by default not overwrite existing
files. If you want this to happen, specify the \-f flag.
.LP
.I bunzip2
decompresses all specified files. Files which were not created by
.I bzip2
will be detected and ignored, and a warning issued.
.I bunzip2
attempts to guess the filename for the decompressed file
from that of the compressed file as follows:
.LP
.nf
filename.bz2 becomes filename
filename.bz becomes filename
filename.tbz2 becomes filename.tar
filename.tbz becomes filename.tar
anyothername becomes anyothername.out
.fi
.LP
If the file does not end in one of the recognised endings,
.I .bz2,
.I .bz,
.I .tbz2
or
.I .tbz,
.I bunzip2
complains that it cannot
guess the name of the original file, and uses the original name
with
.I .out
appended.
.LP
Supplying no filenames causes decompression from
standard input to standard output.
.LP
File name handling is
naive in the sense that there is no mechanism for preserving original
file names, permissions, ownerships or dates in operating systems or
filesystems which lack these concepts, or have serious file name length
restrictions, such as MS-DOS or GS/OS.
.LP
.I bunzip2
will correctly decompress a file which is the
concatenation of two or more compressed files. The result is the
concatenation of the corresponding uncompressed files. Integrity
testing (\-t)
of concatenated
compressed files is also supported.
.LP
You can also decompress files to the standard output by
giving the \-c flag. Multiple files may be
decompressed like this. The resulting outputs are fed sequentially to stdout.
.LP
.I bzcat
(or
.I bunzip2
.I \-c)
decompresses all specified files to
the standard output.
.LP
.I bunzip2
will read arguments from the environment variables
.I BZIP2
and
.I BZIP,
in that order, and will process them
before any arguments read from the command line. This gives a
convenient way to supply default arguments.
.LP
As a self-check for your protection,
.I bzip2
and
.I bunzip2
use 32-bit CRCs to
make sure that the decompressed version of a file is identical to the
original. This guards against corruption of the compressed data, and
against undetected bugs in
.I bzip2
and
.I bunzip2
(hopefully very unlikely). The
chances of data corruption going undetected are microscopic, about one
chance in four billion for each file processed. Be aware, though, that
the check occurs upon decompression, so it can only tell you that
something is wrong. It can't help you
recover the original uncompressed
data. You can use
.I bzip2recover
to try to recover data from
damaged files.
.LP
This manual page pertains to version 1.0.2gs1 of
.I bunzip2.
It is fully campatible with compressed data created with all of the previous
public releases of bzip2, versions
0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1, as well as version 1.0.2.
.LP
Return values: 0 for a normal exit, 1 for environmental problems (file
not found, invalid flags, I/O errors, &c), 2 to indicate a corrupt
compressed file, 3 for an internal consistency error (eg, bug) which
caused
.I bunzip2
to panic.
.LP
.SH OPTIONS
.IP "\fB\-c\fP \fB\--stdout\fP"
Decompress to standard output.
.IP "\fB\-d\fP \fB\--decompress\fP"
Force decompression. This flag is unnecessary on bunzip2 for GNO,
since it always decompresses.
.IP "\fB\-t\fP \fB\--test\fP"
Check integrity of the specified file(s), but don't decompress them.
This really performs a trial decompression and throws away the result.
.IP "\fB\-f\fP \fB\--force\fP"
Force overwrite of output files. Normally,
.I bunzip2
will not overwrite
existing output files.
.sp
.I bunzip2
normally declines to decompress files which don't have the
correct magic header bytes. If forced (-f), however, it will pass
such files through unmodified. This is how GNU gzip behaves.
.IP "\fB\-k\fP \fB\--keep\fP"
Keep (don't delete) input files during decompression.
.IP "\fB\-s\fP \fB\--small\fP"
Reduce memory usage, for decompression and testing. Files
are decompressed and tested using a modified algorithm which only
requires 2.5 bytes per block byte. This means any file can be
decompressed in 2300k of memory, albeit at about half the normal speed.
.sp
In short, if your machine is low on memory (5 megabytes or
less), you will probably need to use \-s. See MEMORY MANAGEMENT below.
.IP "\fB\-q\fP \fB\--quiet\fP"
Suppress non-essential warning messages. Messages pertaining to
I/O errors and other critical events will not be suppressed.
.IP "\fB\-v\fP \fB\--verbose\fP"
Verbose mode -- show the compression ratio for each file processed.
Further \-v's increase the verbosity level, spewing out lots of
information which is primarily of interest for diagnostic purposes.
.IP "\fB\-L\fP \fB\--license\fP \fB\-V\fP \fB\--version\fP"
Display the software version, license terms and conditions.
.IP "\fB\--\fP"
Treats all subsequent arguments as file names, even if they start
with a dash. This is so you can handle files with names beginning
with a dash, for example: bunzip2 \-- \-myfilename.
.LP
.SH MEMORY MANAGEMENT
.I bzip2
compresses large files in blocks. The block size affects
both the compression ratio achieved, and the amount of memory needed for
compression and decompression. The block size can be specified
to be 100,000 bytes through 900,000 bytes (the
default). At decompression time, the block size used for
compression is read from the header of the compressed file, and
.I bunzip2
then allocates itself just enough memory to decompress
the file.
.LP
Decompression requirements, in bytes, can be estimated as:
.LP
.nf
100k + ( 4 x block size ), or
100k + ( 2.5 x block size ) if using \-s
.fi
.LP
For files compressed with the default 900k block size,
.I bunzip2
will require about 3700 kbytes to decompress. To support decompression
of any file on a 4 megabyte machine,
.I bunzip2
has an option to
decompress using approximately half this amount of memory, about 2300
kbytes. Decompression speed is also halved, so you should use this
option only where necessary. The relevant flag is -s.
.LP
Decompression speeds are virtually unaffected by block size.
.LP
Another significant point applies to files which fit in a single block
-- that means most files you'd encounter using a large block size. The
amount of real memory touched is proportional to the size of the file,
since the file is smaller than a block. For example, compressing a file
20,000 bytes long with a 900k block size will cause the decompressor to
allocate 3700k but only touch 100k + 20000 * 4 = 180 kbytes
when decompressing it.
.LP
Here is a table which summarises the maximum memory usage for different
block sizes. Also recorded is the total compressed size for 14 files of
the Calgary Text Compression Corpus totalling 3,141,622 bytes. This
column gives some feel for how compression varies with block size.
These figures tend to understate the advantage of larger block sizes for
larger files, since the Corpus is dominated by smaller files.
.LP
.nf
Block Decompress Decompress Corpus
Size usage -s usage Size
.fi
.LP
.nf
100k 500k 350k 914704
200k 900k 600k 877703
300k 1300k 850k 860338
400k 1700k 1100k 846899
500k 2100k 1350k 845160
600k 2500k 1600k 838626
700k 2900k 1850k 834096
800k 3300k 2100k 828642
900k 3700k 2350k 828642
.fi
.LP
.SH RECOVERING DATA FROM DAMAGED FILES
.I bzip2
compresses files in blocks, usually 900kbytes long. Each
block is handled independently. If a media or transmission error causes
a multi-block .bz2
file to become damaged, it may be possible to
recover data from the undamaged blocks in the file.
.LP
The compressed representation of each block is delimited by a 48-bit
pattern, which makes it possible to find the block boundaries with
reasonable certainty. Each block also carries its own 32-bit CRC, so
damaged blocks can be distinguished from undamaged ones.
.LP
.I bzip2recover
is a simple program whose purpose is to search for blocks in .bz2 files,
and write each block out into its own .bz2 file. You can then use
.I bunzip2
\-t
to test the
integrity of the resulting files, and decompress those which are
undamaged.
.LP
.I bzip2recover
takes a single argument, the name of the damaged file,
and writes a number of files named "rec0001file.bz2",
"rec0002file.bz2", etc, containing the extracted blocks.
The output filenames are designed so that the use of
wildcards in subsequent processing -- for example,
"bunzip2 -c rec*file.bz2 > recovered_data" -- processes the files in
the correct order.
.LP
.I bzip2recover
should be of most use dealing with large .bz2
files, as these will contain many blocks. It is clearly
futile to use it on damaged single-block files, since a
damaged block cannot be recovered. If you wish to minimise
any potential data loss through media or transmission errors,
you might consider compressing with a smaller
block size.
.LP
.SH PERFORMANCE NOTES
.I bunzip2
usually allocates several megabytes of memory to operate
in, and then charges all over it in a fairly random fashion. This means
that performance is largely determined by the speed at which your machine can
access main memory or (if you have a caching accelerator) serve cache misses.
Because of this, small changes to the code to reduce the miss rate have
been observed to give disproportionately large performance improvements.
I imagine that
.I bunzip2
will perform best on machines with very large caches.
.LP
.SH CAVEATS
I/O error messages are not as helpful as they could be.
.I bunzip2
tries hard to detect I/O errors and exit cleanly, but the details of
what the problem is sometimes seem rather misleading.
.LP
.I bzip2recover
for GNO uses 32-bit integers to represent bit positions in compressed files,
so it cannot handle compressed files more than 512 megabytes long.
.LP
.SH AUTHOR
Julian Seward, jseward@acm.org.
.LP
http://sources.redhat.com/bzip2
.LP
The ideas embodied in
.I bzip2
are due to (at least) the following
people: Michael Burrows and David Wheeler (for the block sorting
transformation), David Wheeler (again, for the Huffman coder), Peter
Fenwick (for the structured coding model in the original
.I bzip,
and many refinements), and Alistair Moffat, Radford Neal and Ian Witten
(for the arithmetic coder in the original
.I bzip).
I am much
indebted for their help, support and advice. See the manual in the
source distribution for pointers to sources of documentation. Christian
von Roques encouraged me to look for faster sorting algorithms, so as to
speed up compression. Bela Lubkin encouraged me to improve the
worst-case compression performance. Many people sent patches, helped
with portability problems, lent machines, gave advice and were generally
helpful.
.LP
This version of
.I bunzip2
for GNO has been ported by Stephen Heumann <sheumann@myrealbox.com> from
Julian Seward's
.I bzip2
version 1.0.2 for other platforms.
.LP
This program contains material from the ORCA/C Run-Time Libraries,
copyright 1987-1996 by Byte Works, Inc. Used with permission.
.LP
It also incorporates a public domain stristr routine by Fred Cole,
Bob Stout, and Greg Thayer, which was obtained from http://www.snippets.org .

13
bunzip2.desc Normal file
View File

@ -0,0 +1,13 @@
Name: bunzip2
Version: 1.0.2
Shell: GNO/ME
Author: Stephen Heumann (GNO port of original code by Julian Seward)
Contact: sheumann@myrealbox.com
Where: /usr/local/bin
FTP: ftp.gno.org
Decompression program for files compressed in the bzip2 format. Based
on Julian Seward's bzip2 program, but only supports file decompression and
testing, not compression. Can also be used as bzcat, writing decompressed
data to stdout. Also includes bzip2recover program for restoring data
from partially corrupted bzip2 archives.

15
bunzip2.rez Normal file
View File

@ -0,0 +1,15 @@
#include "/lang/orca/libraries/rinclude/Types.Rez"
resource rVersion (0x1, purgeable3, nocrossbank) {
{ 1, 0, 2, /* version 1.0.2 */
release, /* development|alpha|beta|final|release */
0 /* non-final release number */
},
verUS, /* country code -- only some are avail */
"bunzip2", /* name */
/* _Very_ brief descrition. Check "file info" */
/* shown in the Finder to see if it's too long */
/* Note that \n is used to separate lines here. */
"Bzip2 archive decompression program\n"
};

1
bzcat.1 Normal file
View File

@ -0,0 +1 @@
.so man1/bunzip2.1

76
bzdiff
View File

@ -1,76 +0,0 @@
#!/bin/sh
# sh is buggy on RS/6000 AIX 3.2. Replace above line with #!/bin/ksh
# Bzcmp/diff wrapped for bzip2,
# adapted from zdiff by Philippe Troin <phil@fifi.org> for Debian GNU/Linux.
# Bzcmp and bzdiff are used to invoke the cmp or the diff pro-
# gram on compressed files. All options specified are passed
# directly to cmp or diff. If only 1 file is specified, then
# the files compared are file1 and an uncompressed file1.gz.
# If two files are specified, then they are uncompressed (if
# necessary) and fed to cmp or diff. The exit status from cmp
# or diff is preserved.
PATH="/usr/bin:$PATH"; export PATH
prog=`echo $0 | sed 's|.*/||'`
case "$prog" in
*cmp) comp=${CMP-cmp} ;;
*) comp=${DIFF-diff} ;;
esac
OPTIONS=
FILES=
for ARG
do
case "$ARG" in
-*) OPTIONS="$OPTIONS $ARG";;
*) if test -f "$ARG"; then
FILES="$FILES $ARG"
else
echo "${prog}: $ARG not found or not a regular file"
exit 1
fi ;;
esac
done
if test -z "$FILES"; then
echo "Usage: $prog [${comp}_options] file [file]"
exit 1
fi
tmp=`tempfile -d /tmp -p bz` || {
echo 'cannot create a temporary file' >&2
exit 1
}
set $FILES
if test $# -eq 1; then
FILE=`echo "$1" | sed 's/.bz2$//'`
bzip2 -cd "$FILE.bz2" | $comp $OPTIONS - "$FILE"
STAT="$?"
elif test $# -eq 2; then
case "$1" in
*.bz2)
case "$2" in
*.bz2)
F=`echo "$2" | sed 's|.*/||;s|.bz2$||'`
bzip2 -cdfq "$2" > $tmp
bzip2 -cdfq "$1" | $comp $OPTIONS - $tmp
STAT="$?"
/bin/rm -f $tmp;;
*) bzip2 -cdfq "$1" | $comp $OPTIONS - "$2"
STAT="$?";;
esac;;
*) case "$2" in
*.bz2)
bzip2 -cdfq "$2" | $comp $OPTIONS "$1" -
STAT="$?";;
*) $comp $OPTIONS "$1" "$2"
STAT="$?";;
esac;;
esac
exit "$STAT"
else
echo "Usage: $prog [${comp}_options] file [file]"
exit 1
fi

View File

@ -1,47 +0,0 @@
\"Shamelessly copied from zmore.1 by Philippe Troin <phil@fifi.org>
\"for Debian GNU/Linux
.TH BZDIFF 1
.SH NAME
bzcmp, bzdiff \- compare bzip2 compressed files
.SH SYNOPSIS
.B bzcmp
[ cmp_options ] file1
[ file2 ]
.br
.B bzdiff
[ diff_options ] file1
[ file2 ]
.SH DESCRIPTION
.I Bzcmp
and
.I bzdiff
are used to invoke the
.I cmp
or the
.I diff
program on bzip2 compressed files. All options specified are passed
directly to
.I cmp
or
.IR diff "."
If only 1 file is specified, then the files compared are
.I file1
and an uncompressed
.IR file1 ".bz2."
If two files are specified, then they are uncompressed if necessary and fed to
.I cmp
or
.IR diff "."
The exit status from
.I cmp
or
.I diff
is preserved.
.SH "SEE ALSO"
cmp(1), diff(1), bzmore(1), bzless(1), bzgrep(1), bzip2(1)
.SH BUGS
Messages from the
.I cmp
or
.I diff
programs refer to temporary filenames instead of those specified.

71
bzgrep
View File

@ -1,71 +0,0 @@
#!/bin/sh
# Bzgrep wrapped for bzip2,
# adapted from zgrep by Philippe Troin <phil@fifi.org> for Debian GNU/Linux.
## zgrep notice:
## zgrep -- a wrapper around a grep program that decompresses files as needed
## Adapted from a version sent by Charles Levert <charles@comm.polymtl.ca>
PATH="/usr/bin:$PATH"; export PATH
prog=`echo $0 | sed 's|.*/||'`
case "$prog" in
*egrep) grep=${EGREP-egrep} ;;
*fgrep) grep=${FGREP-fgrep} ;;
*) grep=${GREP-grep} ;;
esac
pat=""
while test $# -ne 0; do
case "$1" in
-e | -f) opt="$opt $1"; shift; pat="$1"
if test "$grep" = grep; then # grep is buggy with -e on SVR4
grep=egrep
fi;;
-A | -B) opt="$opt $1 $2"; shift;;
-*) opt="$opt $1";;
*) if test -z "$pat"; then
pat="$1"
else
break;
fi;;
esac
shift
done
if test -z "$pat"; then
echo "grep through bzip2 files"
echo "usage: $prog [grep_options] pattern [files]"
exit 1
fi
list=0
silent=0
op=`echo "$opt" | sed -e 's/ //g' -e 's/-//g'`
case "$op" in
*l*) list=1
esac
case "$op" in
*h*) silent=1
esac
if test $# -eq 0; then
bzip2 -cdfq | $grep $opt "$pat"
exit $?
fi
res=0
for i do
if test -f "$i"; then :; else if test -f "$i.bz2"; then i="$i.bz2"; fi; fi
if test $list -eq 1; then
bzip2 -cdfq "$i" | $grep $opt "$pat" 2>&1 > /dev/null && echo $i
r=$?
elif test $# -eq 1 -o $silent -eq 1; then
bzip2 -cdfq "$i" | $grep $opt "$pat"
r=$?
else
bzip2 -cdfq "$i" | $grep $opt "$pat" | sed "s|^|${i}:|"
r=$?
fi
test "$r" -ne 0 && res="$r"
done
exit $res

View File

@ -1,56 +0,0 @@
\"Shamelessly copied from zmore.1 by Philippe Troin <phil@fifi.org>
\"for Debian GNU/Linux
.TH BZGREP 1
.SH NAME
bzgrep, bzfgrep, bzegrep \- search possibly bzip2 compressed files for a regular expression
.SH SYNOPSIS
.B bzgrep
[ grep_options ]
.BI [\ -e\ ] " pattern"
.IR filename ".\|.\|."
.br
.B bzegrep
[ egrep_options ]
.BI [\ -e\ ] " pattern"
.IR filename ".\|.\|."
.br
.B bzfgrep
[ fgrep_options ]
.BI [\ -e\ ] " pattern"
.IR filename ".\|.\|."
.SH DESCRIPTION
.IR Bzgrep
is used to invoke the
.I grep
on bzip2-compressed files. All options specified are passed directly to
.I grep.
If no file is specified, then the standard input is decompressed
if necessary and fed to grep.
Otherwise the given files are uncompressed if necessary and fed to
.I grep.
.PP
If
.I bzgrep
is invoked as
.I bzegrep
or
.I bzfgrep
then
.I egrep
or
.I fgrep
is used instead of
.I grep.
If the GREP environment variable is set,
.I bzgrep
uses it as the
.I grep
program to be invoked. For example:
for sh: GREP=fgrep bzgrep string files
for csh: (setenv GREP fgrep; bzgrep string files)
.SH AUTHOR
Charles Levert (charles@comm.polymtl.ca). Adapted to bzip2 by Philippe
Troin <phil@fifi.org> for Debian GNU/Linux.
.SH "SEE ALSO"
grep(1), egrep(1), fgrep(1), bzdiff(1), bzmore(1), bzless(1), bzip2(1)

453
bzip2.1
View File

@ -1,453 +0,0 @@
.PU
.TH bzip2 1
.SH NAME
bzip2, bunzip2 \- a block-sorting file compressor, v1.0.2
.br
bzcat \- decompresses files to stdout
.br
bzip2recover \- recovers data from damaged bzip2 files
.SH SYNOPSIS
.ll +8
.B bzip2
.RB [ " \-cdfkqstvzVL123456789 " ]
[
.I "filenames \&..."
]
.ll -8
.br
.B bunzip2
.RB [ " \-fkvsVL " ]
[
.I "filenames \&..."
]
.br
.B bzcat
.RB [ " \-s " ]
[
.I "filenames \&..."
]
.br
.B bzip2recover
.I "filename"
.SH DESCRIPTION
.I bzip2
compresses files using the Burrows-Wheeler block sorting
text compression algorithm, and Huffman coding. Compression is
generally considerably better than that achieved by more conventional
LZ77/LZ78-based compressors, and approaches the performance of the PPM
family of statistical compressors.
The command-line options are deliberately very similar to
those of
.I GNU gzip,
but they are not identical.
.I bzip2
expects a list of file names to accompany the
command-line flags. Each file is replaced by a compressed version of
itself, with the name "original_name.bz2".
Each compressed file
has the same modification date, permissions, and, when possible,
ownership as the corresponding original, so that these properties can
be correctly restored at decompression time. File name handling is
naive in the sense that there is no mechanism for preserving original
file names, permissions, ownerships or dates in filesystems which lack
these concepts, or have serious file name length restrictions, such as
MS-DOS.
.I bzip2
and
.I bunzip2
will by default not overwrite existing
files. If you want this to happen, specify the \-f flag.
If no file names are specified,
.I bzip2
compresses from standard
input to standard output. In this case,
.I bzip2
will decline to
write compressed output to a terminal, as this would be entirely
incomprehensible and therefore pointless.
.I bunzip2
(or
.I bzip2 \-d)
decompresses all
specified files. Files which were not created by
.I bzip2
will be detected and ignored, and a warning issued.
.I bzip2
attempts to guess the filename for the decompressed file
from that of the compressed file as follows:
filename.bz2 becomes filename
filename.bz becomes filename
filename.tbz2 becomes filename.tar
filename.tbz becomes filename.tar
anyothername becomes anyothername.out
If the file does not end in one of the recognised endings,
.I .bz2,
.I .bz,
.I .tbz2
or
.I .tbz,
.I bzip2
complains that it cannot
guess the name of the original file, and uses the original name
with
.I .out
appended.
As with compression, supplying no
filenames causes decompression from
standard input to standard output.
.I bunzip2
will correctly decompress a file which is the
concatenation of two or more compressed files. The result is the
concatenation of the corresponding uncompressed files. Integrity
testing (\-t)
of concatenated
compressed files is also supported.
You can also compress or decompress files to the standard output by
giving the \-c flag. Multiple files may be compressed and
decompressed like this. The resulting outputs are fed sequentially to
stdout. Compression of multiple files
in this manner generates a stream
containing multiple compressed file representations. Such a stream
can be decompressed correctly only by
.I bzip2
version 0.9.0 or
later. Earlier versions of
.I bzip2
will stop after decompressing
the first file in the stream.
.I bzcat
(or
.I bzip2 -dc)
decompresses all specified files to
the standard output.
.I bzip2
will read arguments from the environment variables
.I BZIP2
and
.I BZIP,
in that order, and will process them
before any arguments read from the command line. This gives a
convenient way to supply default arguments.
Compression is always performed, even if the compressed
file is slightly
larger than the original. Files of less than about one hundred bytes
tend to get larger, since the compression mechanism has a constant
overhead in the region of 50 bytes. Random data (including the output
of most file compressors) is coded at about 8.05 bits per byte, giving
an expansion of around 0.5%.
As a self-check for your protection,
.I
bzip2
uses 32-bit CRCs to
make sure that the decompressed version of a file is identical to the
original. This guards against corruption of the compressed data, and
against undetected bugs in
.I bzip2
(hopefully very unlikely). The
chances of data corruption going undetected is microscopic, about one
chance in four billion for each file processed. Be aware, though, that
the check occurs upon decompression, so it can only tell you that
something is wrong. It can't help you
recover the original uncompressed
data. You can use
.I bzip2recover
to try to recover data from
damaged files.
Return values: 0 for a normal exit, 1 for environmental problems (file
not found, invalid flags, I/O errors, &c), 2 to indicate a corrupt
compressed file, 3 for an internal consistency error (eg, bug) which
caused
.I bzip2
to panic.
.SH OPTIONS
.TP
.B \-c --stdout
Compress or decompress to standard output.
.TP
.B \-d --decompress
Force decompression.
.I bzip2,
.I bunzip2
and
.I bzcat
are
really the same program, and the decision about what actions to take is
done on the basis of which name is used. This flag overrides that
mechanism, and forces
.I bzip2
to decompress.
.TP
.B \-z --compress
The complement to \-d: forces compression, regardless of the
invocation name.
.TP
.B \-t --test
Check integrity of the specified file(s), but don't decompress them.
This really performs a trial decompression and throws away the result.
.TP
.B \-f --force
Force overwrite of output files. Normally,
.I bzip2
will not overwrite
existing output files. Also forces
.I bzip2
to break hard links
to files, which it otherwise wouldn't do.
bzip2 normally declines to decompress files which don't have the
correct magic header bytes. If forced (-f), however, it will pass
such files through unmodified. This is how GNU gzip behaves.
.TP
.B \-k --keep
Keep (don't delete) input files during compression
or decompression.
.TP
.B \-s --small
Reduce memory usage, for compression, decompression and testing. Files
are decompressed and tested using a modified algorithm which only
requires 2.5 bytes per block byte. This means any file can be
decompressed in 2300k of memory, albeit at about half the normal speed.
During compression, \-s selects a block size of 200k, which limits
memory use to around the same figure, at the expense of your compression
ratio. In short, if your machine is low on memory (8 megabytes or
less), use \-s for everything. See MEMORY MANAGEMENT below.
.TP
.B \-q --quiet
Suppress non-essential warning messages. Messages pertaining to
I/O errors and other critical events will not be suppressed.
.TP
.B \-v --verbose
Verbose mode -- show the compression ratio for each file processed.
Further \-v's increase the verbosity level, spewing out lots of
information which is primarily of interest for diagnostic purposes.
.TP
.B \-L --license -V --version
Display the software version, license terms and conditions.
.TP
.B \-1 (or \-\-fast) to \-9 (or \-\-best)
Set the block size to 100 k, 200 k .. 900 k when compressing. Has no
effect when decompressing. See MEMORY MANAGEMENT below.
The \-\-fast and \-\-best aliases are primarily for GNU gzip
compatibility. In particular, \-\-fast doesn't make things
significantly faster.
And \-\-best merely selects the default behaviour.
.TP
.B \--
Treats all subsequent arguments as file names, even if they start
with a dash. This is so you can handle files with names beginning
with a dash, for example: bzip2 \-- \-myfilename.
.TP
.B \--repetitive-fast --repetitive-best
These flags are redundant in versions 0.9.5 and above. They provided
some coarse control over the behaviour of the sorting algorithm in
earlier versions, which was sometimes useful. 0.9.5 and above have an
improved algorithm which renders these flags irrelevant.
.SH MEMORY MANAGEMENT
.I bzip2
compresses large files in blocks. The block size affects
both the compression ratio achieved, and the amount of memory needed for
compression and decompression. The flags \-1 through \-9
specify the block size to be 100,000 bytes through 900,000 bytes (the
default) respectively. At decompression time, the block size used for
compression is read from the header of the compressed file, and
.I bunzip2
then allocates itself just enough memory to decompress
the file. Since block sizes are stored in compressed files, it follows
that the flags \-1 to \-9 are irrelevant to and so ignored
during decompression.
Compression and decompression requirements,
in bytes, can be estimated as:
Compression: 400k + ( 8 x block size )
Decompression: 100k + ( 4 x block size ), or
100k + ( 2.5 x block size )
Larger block sizes give rapidly diminishing marginal returns. Most of
the compression comes from the first two or three hundred k of block
size, a fact worth bearing in mind when using
.I bzip2
on small machines.
It is also important to appreciate that the decompression memory
requirement is set at compression time by the choice of block size.
For files compressed with the default 900k block size,
.I bunzip2
will require about 3700 kbytes to decompress. To support decompression
of any file on a 4 megabyte machine,
.I bunzip2
has an option to
decompress using approximately half this amount of memory, about 2300
kbytes. Decompression speed is also halved, so you should use this
option only where necessary. The relevant flag is -s.
In general, try and use the largest block size memory constraints allow,
since that maximises the compression achieved. Compression and
decompression speed are virtually unaffected by block size.
Another significant point applies to files which fit in a single block
-- that means most files you'd encounter using a large block size. The
amount of real memory touched is proportional to the size of the file,
since the file is smaller than a block. For example, compressing a file
20,000 bytes long with the flag -9 will cause the compressor to
allocate around 7600k of memory, but only touch 400k + 20000 * 8 = 560
kbytes of it. Similarly, the decompressor will allocate 3700k but only
touch 100k + 20000 * 4 = 180 kbytes.
Here is a table which summarises the maximum memory usage for different
block sizes. Also recorded is the total compressed size for 14 files of
the Calgary Text Compression Corpus totalling 3,141,622 bytes. This
column gives some feel for how compression varies with block size.
These figures tend to understate the advantage of larger block sizes for
larger files, since the Corpus is dominated by smaller files.
Compress Decompress Decompress Corpus
Flag usage usage -s usage Size
-1 1200k 500k 350k 914704
-2 2000k 900k 600k 877703
-3 2800k 1300k 850k 860338
-4 3600k 1700k 1100k 846899
-5 4400k 2100k 1350k 845160
-6 5200k 2500k 1600k 838626
-7 6100k 2900k 1850k 834096
-8 6800k 3300k 2100k 828642
-9 7600k 3700k 2350k 828642
.SH RECOVERING DATA FROM DAMAGED FILES
.I bzip2
compresses files in blocks, usually 900kbytes long. Each
block is handled independently. If a media or transmission error causes
a multi-block .bz2
file to become damaged, it may be possible to
recover data from the undamaged blocks in the file.
The compressed representation of each block is delimited by a 48-bit
pattern, which makes it possible to find the block boundaries with
reasonable certainty. Each block also carries its own 32-bit CRC, so
damaged blocks can be distinguished from undamaged ones.
.I bzip2recover
is a simple program whose purpose is to search for
blocks in .bz2 files, and write each block out into its own .bz2
file. You can then use
.I bzip2
\-t
to test the
integrity of the resulting files, and decompress those which are
undamaged.
.I bzip2recover
takes a single argument, the name of the damaged file,
and writes a number of files "rec00001file.bz2",
"rec00002file.bz2", etc, containing the extracted blocks.
The output filenames are designed so that the use of
wildcards in subsequent processing -- for example,
"bzip2 -dc rec*file.bz2 > recovered_data" -- processes the files in
the correct order.
.I bzip2recover
should be of most use dealing with large .bz2
files, as these will contain many blocks. It is clearly
futile to use it on damaged single-block files, since a
damaged block cannot be recovered. If you wish to minimise
any potential data loss through media or transmission errors,
you might consider compressing with a smaller
block size.
.SH PERFORMANCE NOTES
The sorting phase of compression gathers together similar strings in the
file. Because of this, files containing very long runs of repeated
symbols, like "aabaabaabaab ..." (repeated several hundred times) may
compress more slowly than normal. Versions 0.9.5 and above fare much
better than previous versions in this respect. The ratio between
worst-case and average-case compression time is in the region of 10:1.
For previous versions, this figure was more like 100:1. You can use the
\-vvvv option to monitor progress in great detail, if you want.
Decompression speed is unaffected by these phenomena.
.I bzip2
usually allocates several megabytes of memory to operate
in, and then charges all over it in a fairly random fashion. This means
that performance, both for compressing and decompressing, is largely
determined by the speed at which your machine can service cache misses.
Because of this, small changes to the code to reduce the miss rate have
been observed to give disproportionately large performance improvements.
I imagine
.I bzip2
will perform best on machines with very large caches.
.SH CAVEATS
I/O error messages are not as helpful as they could be.
.I bzip2
tries hard to detect I/O errors and exit cleanly, but the details of
what the problem is sometimes seem rather misleading.
This manual page pertains to version 1.0.2 of
.I bzip2.
Compressed data created by this version is entirely forwards and
backwards compatible with the previous public releases, versions
0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1, but with the following
exception: 0.9.0 and above can correctly decompress multiple
concatenated compressed files. 0.1pl2 cannot do this; it will stop
after decompressing just the first file in the stream.
.I bzip2recover
versions prior to this one, 1.0.2, used 32-bit integers to represent
bit positions in compressed files, so it could not handle compressed
files more than 512 megabytes long. Version 1.0.2 and above uses
64-bit ints on some platforms which support them (GNU supported
targets, and Windows). To establish whether or not bzip2recover was
built with such a limitation, run it without arguments. In any event
you can build yourself an unlimited version if you can recompile it
with MaybeUInt64 set to be an unsigned 64-bit integer.
.SH AUTHOR
Julian Seward, jseward@acm.org.
http://sources.redhat.com/bzip2
The ideas embodied in
.I bzip2
are due to (at least) the following
people: Michael Burrows and David Wheeler (for the block sorting
transformation), David Wheeler (again, for the Huffman coder), Peter
Fenwick (for the structured coding model in the original
.I bzip,
and many refinements), and Alistair Moffat, Radford Neal and Ian Witten
(for the arithmetic coder in the original
.I bzip).
I am much
indebted for their help, support and advice. See the manual in the
source distribution for pointers to sources of documentation. Christian