GNO port source code (from 2003)
This commit is contained in:
parent
18bf65b3db
commit
160de28119
220
Makefile
220
Makefile
|
@ -1,192 +1,94 @@
|
|||
# Makefile for bunzip2 for GNO (for use with dmake)
|
||||
# Based on Unix Makefile for bzip2
|
||||
# Modified for GNO by Stephen Heumann
|
||||
|
||||
SHELL=/bin/sh
|
||||
# ORCA/C 2.1.0 may need more than 8 megabytes of RAM to compile decompress.c
|
||||
# with full optimization enabled. Thus, this makefile can only
|
||||
# be used as is on an emulated system with 14 megabyte RAM support.
|
||||
|
||||
# To assist in cross-compiling
|
||||
CC=gcc
|
||||
AR=ar
|
||||
RANLIB=ranlib
|
||||
# Uncomment this if make doesn't have the $CC variable set appropriately
|
||||
# CC=occ
|
||||
RM=cp -p rm
|
||||
|
||||
LDFLAGS=
|
||||
|
||||
# Suitably paranoid flags to avoid bugs in gcc-2.7
|
||||
BIGFILES=-D_FILE_OFFSET_BITS=64
|
||||
CFLAGS=-Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce $(BIGFILES)
|
||||
# The "-I /usr/include" shouldn't be needed but seemed to fix problems for me
|
||||
CFLAGS=-a0 -w -O -I /usr/include
|
||||
|
||||
NOROOTFLAG=-r
|
||||
|
||||
# Where you want it installed when you do 'make install'
|
||||
PREFIX=/usr
|
||||
PREFIX=/usr/local
|
||||
|
||||
|
||||
OBJS= blocksort.o \
|
||||
OBJS= stristr.o \
|
||||
huffman.o \
|
||||
crctable.o \
|
||||
randtable.o \
|
||||
compress.o \
|
||||
decompress.o \
|
||||
bzlib.o
|
||||
|
||||
all: libbz2.a bzip2 bzip2recover test
|
||||
all: bunzip2 bzip2recover test
|
||||
|
||||
bzip2: libbz2.a bzip2.o
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -o bzip2 bzip2.o -L. -lbz2
|
||||
bunzip2: bzip2.o $(OBJS)
|
||||
$(CC) -o bunzip2 bunzip2.rez
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) bzip2.o $(OBJS) -o bunzip2
|
||||
|
||||
bzip2recover: bzip2recover.o
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -o bzip2recover bzip2recover.o
|
||||
|
||||
libbz2.a: $(OBJS)
|
||||
rm -f libbz2.a
|
||||
$(AR) cq libbz2.a $(OBJS)
|
||||
@if ( test -f $(RANLIB) -o -f /usr/bin/ranlib -o \
|
||||
-f /bin/ranlib -o -f /usr/ccs/bin/ranlib ) ; then \
|
||||
echo $(RANLIB) libbz2.a ; \
|
||||
$(RANLIB) libbz2.a ; \
|
||||
fi
|
||||
$(CC) -o bzip2recover bzip2recover.rez
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) bzip2recover.o -o bzip2recover
|
||||
|
||||
check: test
|
||||
test: bzip2
|
||||
test: bunzip2
|
||||
@cat words1
|
||||
./bzip2 -1 < sample1.ref > sample1.rb2
|
||||
./bzip2 -2 < sample2.ref > sample2.rb2
|
||||
./bzip2 -3 < sample3.ref > sample3.rb2
|
||||
./bzip2 -d < sample1.bz2 > sample1.tst
|
||||
./bzip2 -d < sample2.bz2 > sample2.tst
|
||||
./bzip2 -ds < sample3.bz2 > sample3.tst
|
||||
cmp sample1.bz2 sample1.rb2
|
||||
cmp sample2.bz2 sample2.rb2
|
||||
cmp sample3.bz2 sample3.rb2
|
||||
./bunzip2 -dk < sample1.bz2 > sample1.tst
|
||||
./bunzip2 -dk < sample2.bz2 > sample2.tst
|
||||
./bunzip2 -dks < sample3.bz2 > sample3.tst
|
||||
@cat words2
|
||||
cmp sample1.tst sample1.ref
|
||||
cmp sample2.tst sample2.ref
|
||||
cmp sample3.tst sample3.ref
|
||||
@cat words3
|
||||
|
||||
install: bzip2 bzip2recover
|
||||
if ( test ! -d $(PREFIX)/bin ) ; then mkdir -p $(PREFIX)/bin ; fi
|
||||
if ( test ! -d $(PREFIX)/lib ) ; then mkdir -p $(PREFIX)/lib ; fi
|
||||
if ( test ! -d $(PREFIX)/man ) ; then mkdir -p $(PREFIX)/man ; fi
|
||||
if ( test ! -d $(PREFIX)/man/man1 ) ; then mkdir -p $(PREFIX)/man/man1 ; fi
|
||||
if ( test ! -d $(PREFIX)/include ) ; then mkdir -p $(PREFIX)/include ; fi
|
||||
cp -f bzip2 $(PREFIX)/bin/bzip2
|
||||
cp -f bzip2 $(PREFIX)/bin/bunzip2
|
||||
cp -f bzip2 $(PREFIX)/bin/bzcat
|
||||
install: bunzip2 bzip2recover test justinstall
|
||||
|
||||
justinstall:
|
||||
# This should install bunzip2 for GNO under /usr/local
|
||||
mkdir $(PREFIX)/bin >& .null
|
||||
mkdir $(PREFIX)/man >& .null
|
||||
mkdir $(PREFIX)/man/man1 >& .null
|
||||
cp -f bunzip2 $(PREFIX)/bin/bunzip2
|
||||
cp -f bzip2recover $(PREFIX)/bin/bzip2recover
|
||||
chmod a+x $(PREFIX)/bin/bzip2
|
||||
chmod a+x $(PREFIX)/bin/bunzip2
|
||||
chmod a+x $(PREFIX)/bin/bzcat
|
||||
chmod a+x $(PREFIX)/bin/bzip2recover
|
||||
cp -f bzip2.1 $(PREFIX)/man/man1
|
||||
chmod a+r $(PREFIX)/man/man1/bzip2.1
|
||||
cp -f bzlib.h $(PREFIX)/include
|
||||
chmod a+r $(PREFIX)/include/bzlib.h
|
||||
cp -f libbz2.a $(PREFIX)/lib
|
||||
chmod a+r $(PREFIX)/lib/libbz2.a
|
||||
cp -f bzgrep $(PREFIX)/bin/bzgrep
|
||||
ln $(PREFIX)/bin/bzgrep $(PREFIX)/bin/bzegrep
|
||||
ln $(PREFIX)/bin/bzgrep $(PREFIX)/bin/bzfgrep
|
||||
chmod a+x $(PREFIX)/bin/bzgrep
|
||||
cp -f bzmore $(PREFIX)/bin/bzmore
|
||||
ln $(PREFIX)/bin/bzmore $(PREFIX)/bin/bzless
|
||||
chmod a+x $(PREFIX)/bin/bzmore
|
||||
cp -f bzdiff $(PREFIX)/bin/bzdiff
|
||||
ln $(PREFIX)/bin/bzdiff $(PREFIX)/bin/bzcmp
|
||||
chmod a+x $(PREFIX)/bin/bzdiff
|
||||
cp -f bzgrep.1 bzmore.1 bzdiff.1 $(PREFIX)/man/man1
|
||||
chmod a+r $(PREFIX)/man/man1/bzgrep.1
|
||||
chmod a+r $(PREFIX)/man/man1/bzmore.1
|
||||
chmod a+r $(PREFIX)/man/man1/bzdiff.1
|
||||
echo ".so man1/bzgrep.1" > $(PREFIX)/man/man1/bzegrep.1
|
||||
echo ".so man1/bzgrep.1" > $(PREFIX)/man/man1/bzfgrep.1
|
||||
echo ".so man1/bzmore.1" > $(PREFIX)/man/man1/bzless.1
|
||||
echo ".so man1/bzdiff.1" > $(PREFIX)/man/man1/bzcmp.1
|
||||
cp -f bunzip2.1 $(PREFIX)/man/man1/bunzip2.1
|
||||
cp -f bzip2recover.1 $(PREFIX)/man/man1/bzip2recover.1
|
||||
cp -f bzcat.1 $(PREFIX)/man/man1/bzcat.1
|
||||
@cat words4
|
||||
|
||||
distclean: clean
|
||||
clean:
|
||||
rm -f *.o libbz2.a bzip2 bzip2recover \
|
||||
sample1.rb2 sample2.rb2 sample3.rb2 \
|
||||
clean:
|
||||
$(RM) -f *.o *.a *.sym *.root bunzip2 bzip2recover \
|
||||
sample1.tst sample2.tst sample3.tst
|
||||
|
||||
blocksort.o: blocksort.c
|
||||
@cat words0
|
||||
$(CC) $(CFLAGS) -c blocksort.c
|
||||
huffman.o: huffman.c
|
||||
$(CC) $(CFLAGS) -c huffman.c
|
||||
crctable.o: crctable.c
|
||||
$(CC) $(CFLAGS) -c crctable.c
|
||||
randtable.o: randtable.c
|
||||
$(CC) $(CFLAGS) -c randtable.c
|
||||
compress.o: compress.c
|
||||
$(CC) $(CFLAGS) -c compress.c
|
||||
decompress.o: decompress.c
|
||||
$(CC) $(CFLAGS) -c decompress.c
|
||||
bzlib.o: bzlib.c
|
||||
$(CC) $(CFLAGS) -c bzlib.c
|
||||
bzip2.o: bzip2.c
|
||||
$(CC) $(CFLAGS) -c bzip2.c
|
||||
stristr.o: stristr.c
|
||||
$(CC) $(CFLAGS) $(NOROOTFLAG) -c stristr.c
|
||||
huffman.o: huffman.c bzlib_private.h
|
||||
$(CC) $(CFLAGS) $(NOROOTFLAG) -c huffman.c
|
||||
crctable.o: crctable.c bzlib_private.h
|
||||
$(CC) $(CFLAGS) $(NOROOTFLAG) -c crctable.c
|
||||
randtable.o: randtable.c bzlib_private.h
|
||||
$(CC) $(CFLAGS) $(NOROOTFLAG) -c randtable.c
|
||||
decompress.o: decompress.c bzlib_private.h
|
||||
$(CC) $(CFLAGS) $(NOROOTFLAG) -c decompress.c
|
||||
bzlib.o: bzlib.c bzlib_private.h
|
||||
$(CC) $(CFLAGS) $(NOROOTFLAG) -c bzlib.c
|
||||
bzip2.o: bzip2.c bzlib.h
|
||||
$(CC) $(CFLAGS) -s 2048 -C1 -c bzip2.c
|
||||
# $(CC) $(CFLAGS) -C1 -D __STACK_CHECK__ -c bzip2.c
|
||||
bzip2recover.o: bzip2recover.c
|
||||
$(CC) $(CFLAGS) -c bzip2recover.c
|
||||
$(CC) $(CFLAGS) -s 1024 -c bzip2recover.c
|
||||
# $(CC) $(CFLAGS) -D __STACK_CHECK__ -c bzip2recover.c
|
||||
bzlib_private.h: bzlib.h
|
||||
|
||||
DISTNAME=bzip2-1.0.2
|
||||
tarfile:
|
||||
rm -f $(DISTNAME)
|
||||
ln -sf . $(DISTNAME)
|
||||
tar cvf $(DISTNAME).tar \
|
||||
$(DISTNAME)/blocksort.c \
|
||||
$(DISTNAME)/huffman.c \
|
||||
$(DISTNAME)/crctable.c \
|
||||
$(DISTNAME)/randtable.c \
|
||||
$(DISTNAME)/compress.c \
|
||||
$(DISTNAME)/decompress.c \
|
||||
$(DISTNAME)/bzlib.c \
|
||||
$(DISTNAME)/bzip2.c \
|
||||
$(DISTNAME)/bzip2recover.c \
|
||||
$(DISTNAME)/bzlib.h \
|
||||
$(DISTNAME)/bzlib_private.h \
|
||||
$(DISTNAME)/Makefile \
|
||||
$(DISTNAME)/manual.texi \
|
||||
$(DISTNAME)/manual.ps \
|
||||
$(DISTNAME)/manual.pdf \
|
||||
$(DISTNAME)/LICENSE \
|
||||
$(DISTNAME)/bzip2.1 \
|
||||
$(DISTNAME)/bzip2.1.preformatted \
|
||||
$(DISTNAME)/bzip2.txt \
|
||||
$(DISTNAME)/words0 \
|
||||
$(DISTNAME)/words1 \
|
||||
$(DISTNAME)/words2 \
|
||||
$(DISTNAME)/words3 \
|
||||
$(DISTNAME)/sample1.ref \
|
||||
$(DISTNAME)/sample2.ref \
|
||||
$(DISTNAME)/sample3.ref \
|
||||
$(DISTNAME)/sample1.bz2 \
|
||||
$(DISTNAME)/sample2.bz2 \
|
||||
$(DISTNAME)/sample3.bz2 \
|
||||
$(DISTNAME)/dlltest.c \
|
||||
$(DISTNAME)/*.html \
|
||||
$(DISTNAME)/README \
|
||||
$(DISTNAME)/README.COMPILATION.PROBLEMS \
|
||||
$(DISTNAME)/CHANGES \
|
||||
$(DISTNAME)/libbz2.def \
|
||||
$(DISTNAME)/libbz2.dsp \
|
||||
$(DISTNAME)/dlltest.dsp \
|
||||
$(DISTNAME)/makefile.msc \
|
||||
$(DISTNAME)/Y2K_INFO \
|
||||
$(DISTNAME)/unzcrash.c \
|
||||
$(DISTNAME)/spewG.c \
|
||||
$(DISTNAME)/mk251.c \
|
||||
$(DISTNAME)/bzdiff \
|
||||
$(DISTNAME)/bzdiff.1 \
|
||||
$(DISTNAME)/bzmore \
|
||||
$(DISTNAME)/bzmore.1 \
|
||||
$(DISTNAME)/bzgrep \
|
||||
$(DISTNAME)/bzgrep.1 \
|
||||
$(DISTNAME)/Makefile-libbz2_so
|
||||
gzip -v $(DISTNAME).tar
|
||||
|
||||
# For rebuilding the manual from sources on my RedHat 7.2 box
|
||||
manual: manual.ps manual.pdf manual.html
|
||||
|
||||
manual.ps: manual.texi
|
||||
tex manual.texi
|
||||
dvips -o manual.ps manual.dvi
|
||||
|
||||
manual.pdf: manual.ps
|
||||
ps2pdf manual.ps
|
||||
|
||||
manual.html: manual.texi
|
||||
texi2html -split_chapter manual.texi
|
||||
chtyp:
|
||||
chtyp -l cc *.c *.h
|
||||
|
|
|
@ -1,44 +0,0 @@
|
|||
|
||||
# This Makefile builds a shared version of the library,
|
||||
# libbz2.so.1.0.2, with soname libbz2.so.1.0,
|
||||
# at least on x86-Linux (RedHat 7.2),
|
||||
# with gcc-2.96 20000731 (Red Hat Linux 7.1 2.96-98).
|
||||
# Please see the README file for some
|
||||
# important info about building the library like this.
|
||||
|
||||
SHELL=/bin/sh
|
||||
CC=gcc
|
||||
BIGFILES=-D_FILE_OFFSET_BITS=64
|
||||
CFLAGS=-fpic -fPIC -Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce $(BIGFILES)
|
||||
|
||||
OBJS= blocksort.o \
|
||||
huffman.o \
|
||||
crctable.o \
|
||||
randtable.o \
|
||||
compress.o \
|
||||
decompress.o \
|
||||
bzlib.o
|
||||
|
||||
all: $(OBJS)
|
||||
$(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.2 $(OBJS)
|
||||
$(CC) $(CFLAGS) -o bzip2-shared bzip2.c libbz2.so.1.0.2
|
||||
rm -f libbz2.so.1.0
|
||||
ln -s libbz2.so.1.0.2 libbz2.so.1.0
|
||||
|
||||
clean:
|
||||
rm -f $(OBJS) bzip2.o libbz2.so.1.0.2 libbz2.so.1.0 bzip2-shared
|
||||
|
||||
blocksort.o: blocksort.c
|
||||
$(CC) $(CFLAGS) -c blocksort.c
|
||||
huffman.o: huffman.c
|
||||
$(CC) $(CFLAGS) -c huffman.c
|
||||
crctable.o: crctable.c
|
||||
$(CC) $(CFLAGS) -c crctable.c
|
||||
randtable.o: randtable.c
|
||||
$(CC) $(CFLAGS) -c randtable.c
|
||||
compress.o: compress.c
|
||||
$(CC) $(CFLAGS) -c compress.c
|
||||
decompress.o: decompress.c
|
||||
$(CC) $(CFLAGS) -c decompress.c
|
||||
bzlib.o: bzlib.c
|
||||
$(CC) $(CFLAGS) -c bzlib.c
|
|
@ -1,130 +0,0 @@
|
|||
|
||||
bzip2-1.0 should compile without problems on the vast majority of
|
||||
platforms. Using the supplied Makefile, I've built and tested it
|
||||
myself for x86-linux, sparc-solaris, alpha-linux, x86-cygwin32 and
|
||||
alpha-tru64unix. With makefile.msc, Visual C++ 6.0 and nmake, you can
|
||||
build a native Win32 version too. Large file support seems to work
|
||||
correctly on at least alpha-tru64unix and x86-cygwin32 (on Windows
|
||||
2000).
|
||||
|
||||
When I say "large file" I mean a file of size 2,147,483,648 (2^31)
|
||||
bytes or above. Many older OSs can't handle files above this size,
|
||||
but many newer ones can. Large files are pretty huge -- most files
|
||||
you'll encounter are not Large Files.
|
||||
|
||||
Earlier versions of bzip2 (0.1, 0.9.0, 0.9.5) compiled on a wide
|
||||
variety of platforms without difficulty, and I hope this version will
|
||||
continue in that tradition. However, in order to support large files,
|
||||
I've had to include the define -D_FILE_OFFSET_BITS=64 in the Makefile.
|
||||
This can cause problems.
|
||||
|
||||
The technique of adding -D_FILE_OFFSET_BITS=64 to get large file
|
||||
support is, as far as I know, the Recommended Way to get correct large
|
||||
file support. For more details, see the Large File Support
|
||||
Specification, published by the Large File Summit, at
|
||||
http://www.sas.com/standard/large.file/
|
||||
|
||||
As a general comment, if you get compilation errors which you think
|
||||
are related to large file support, try removing the above define from
|
||||
the Makefile, ie, delete the line
|
||||
BIGFILES=-D_FILE_OFFSET_BITS=64
|
||||
from the Makefile, and do 'make clean ; make'. This will give you a
|
||||
version of bzip2 without large file support, which, for most
|
||||
applications, is probably not a problem.
|
||||
|
||||
Alternatively, try some of the platform-specific hints listed below.
|
||||
|
||||
You can use the spewG.c program to generate huge files to test bzip2's
|
||||
large file support, if you are feeling paranoid. Be aware though that
|
||||
any compilation problems which affect bzip2 will also affect spewG.c,
|
||||
alas.
|
||||
|
||||
|
||||
Known problems as of 1.0pre8:
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* HP/UX 10.20 and 11.00, using gcc (2.7.2.3 and 2.95.2): A large
|
||||
number of warnings appear, including the following:
|
||||
|
||||
/usr/include/sys/resource.h: In function `getrlimit':
|
||||
/usr/include/sys/resource.h:168:
|
||||
warning: implicit declaration of function `__getrlimit64'
|
||||
/usr/include/sys/resource.h: In function `setrlimit':
|
||||
/usr/include/sys/resource.h:170:
|
||||
warning: implicit declaration of function `__setrlimit64'
|
||||
|
||||
This would appear to be a problem with large file support, header
|
||||
files and gcc. gcc may or may not give up at this point. If it
|
||||
fails, you might be able to improve matters by adding
|
||||
-D__STDC_EXT__=1
|
||||
to the BIGFILES variable in the Makefile (ie, change its definition
|
||||
to
|
||||
BIGFILES=-D_FILE_OFFSET_BITS=64 -D__STDC_EXT__=1
|
||||
|
||||
Even if gcc does produce a binary which appears to work (ie passes
|
||||
its self-tests), you might want to test it to see if it works properly
|
||||
on large files.
|
||||
|
||||
|
||||
* HP/UX 10.20 and 11.00, using HP's cc compiler.
|
||||
|
||||
No specific problems for this combination, except that you'll need to
|
||||
specify the -Ae flag, and zap the gcc-specific stuff
|
||||
-Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce.
|
||||
You should retain -D_FILE_OFFSET_BITS=64 in order to get large
|
||||
file support -- which is reported to work ok for this HP/UX + cc
|
||||
combination.
|
||||
|
||||
|
||||
* SunOS 4.1.X.
|
||||
|
||||
Amazingly, there are still people out there using this venerable old
|
||||
banger. I shouldn't be too rude -- I started life on SunOS, and
|
||||
it was a pretty darn good OS, way back then. Anyway:
|
||||
|
||||
SunOS doesn't seem to have strerror(), so you'll have to use
|
||||
perror(), perhaps by doing adding this (warning: UNTESTED CODE):
|
||||
|
||||
char* strerror ( int errnum )
|
||||
{
|
||||
if (errnum < 0 || errnum >= sys_nerr)
|
||||
return "Unknown error";
|
||||
else
|
||||
return sys_errlist[errnum];
|
||||
}
|
||||
|
||||
Or you could comment out the relevant calls to strerror; they're
|
||||
not mission-critical. Or you could upgrade to Solaris. Ha ha ha!
|
||||
(what?? you think I've got Bad Attitude?)
|
||||
|
||||
|
||||
* Making a shared library on Solaris. (Not really a compilation
|
||||
problem, but many people ask ...)
|
||||
|
||||
Firstly, if you have Solaris 8, either you have libbz2.so already
|
||||
on your system, or you can install it from the Solaris CD.
|
||||
|
||||
Secondly, be aware that there are potential naming conflicts
|
||||
between the .so file supplied with Solaris 8, and the .so file
|
||||
which Makefile-libbz2_so will make. Makefile-libbz2_so creates
|
||||
a .so which has the names which I intend to be "official" as
|
||||
of version 1.0.0 and onwards. Unfortunately, the .so in
|
||||
Solaris 8 appeared before I decided on the final names, so
|
||||
the two libraries are incompatible. We have since communicated
|
||||
and I hope that the problems will have been solved in the next
|
||||
version of Solaris, whenever that might appear.
|
||||
|
||||
All that said: you might be able to get somewhere
|
||||
by finding the line in Makefile-libbz2_so which says
|
||||
|
||||
$(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.2 $(OBJS)
|
||||
|
||||
and replacing with
|
||||
|
||||
$(CC) -G -shared -o libbz2.so.1.0.2 -h libbz2.so.1.0 $(OBJS)
|
||||
|
||||
If gcc objects to the combination -fpic -fPIC, get rid of
|
||||
the second one, leaving just "-fpic".
|
||||
|
||||
|
||||
That's the end of the currently known compilation problems.
|
|
@ -0,0 +1,158 @@
|
|||
README FOR BUNZIP2 FOR GNO
|
||||
==========================
|
||||
This is a port of the bunzip2 archive decompression program to the GNO
|
||||
environment on the Apple IIgs. It is based on Julian Seward's original
|
||||
bzip2 program, but it includes only the decompression (and testing)
|
||||
functionality; compression is disabled. This archive also includes the
|
||||
bzip2recover program, which may allow you to recover some data from a
|
||||
partially corrupted bzip2 archive file. These programs correspond to
|
||||
Julian Seward's bzip2 version 1.0.2.
|
||||
|
||||
|
||||
REQUIREMENTS
|
||||
============
|
||||
Bunzip2 reguires a ROM 01 or ROM 3 Apple IIgs (or an emulator thereof)
|
||||
running IIgs System Software 6.0.1 and GNO 2.0.6 (or later).
|
||||
|
||||
Bunzip2 also needs considerable memory. It will not be able to decompress
|
||||
most archives if you have less than 4 megabytes of RAM. On 4-5 MB
|
||||
systems, you will likely have to specify the -s option to minimize memory
|
||||
usage; on an 8MB (or 14MB) system, this will probably not be necessary,
|
||||
unless you have a very large number of system extensions or other programs
|
||||
running under GNO. See the manpage for more details on memory usage.
|
||||
If bunzip2 gives you an out-of-memory error the first time you run it, try
|
||||
again. The first attempt may have caused the system to reorganize memory
|
||||
and purge unneeded data, freeing up enough space to run bunzip2 on the
|
||||
second attempt.
|
||||
|
||||
Bunzip2 will also benefit from an accelerator, although one is obviously
|
||||
not required. Even with an accelerator, it can be rather slow when
|
||||
decompressing larger archives. Be prepared to wait a very long time
|
||||
(several hours or even longer) for bunzip2 to finish decompressing large
|
||||
bzip2 archives.
|
||||
|
||||
|
||||
INSTALLATION
|
||||
============
|
||||
To install bunzip2, simply run "dmake justinstall". Alternatively, you can
|
||||
install it manually: copy the bunzip2 and bzip2recover programs to your GNO
|
||||
installation's /usr/local/bin directory, and copy the bunzip2.1, bzcat.1,
|
||||
and bzip2recover.1 manpages to the /usr/local/man/man1 directory.
|
||||
|
||||
After installing bunzip2, you should read the manpage for directions on how
|
||||
to use it. You can put the following line in your gshrc file so you can use
|
||||
'bzcat' as documented in the manpage:
|
||||
|
||||
alias bzcat "bunzip2 -c"
|
||||
|
||||
|
||||
NOTES ON THE SOURCE CODE
|
||||
========================
|
||||
[If you just want to use bunzip2, you do not need to read this section.]
|
||||
|
||||
Please note that a couple source files use non-ProDOS compatible filenames.
|
||||
If you do not have an HFS or AppleShare partition available, these can
|
||||
easily be changed to fit ProDOS conventions.
|
||||
|
||||
I had to make several changes to the bzip2 program when porting it to GNO.
|
||||
The code is not very good-looking, but it does compile without warnings.
|
||||
|
||||
First, I disabled the compression functionality and set up the program to
|
||||
decompress by default (and I renamed the binary to 'bunzip2' to reflect
|
||||
this). The compression functionality is not very important on the GS, since
|
||||
bzip2 is not a very good choice for compressing GS-specific data; ShrinkIt
|
||||
will be much faster and preserves GS-specific file attributes. Even if you
|
||||
want to create archives for use on UNIX-like systems, compress or gzip is
|
||||
a better choice, and both are already available under GNO. For these
|
||||
reasons, and because it reduced the amount of code that I had to modify, I
|
||||
removed the compression functionality from bunzip2.
|
||||
|
||||
Other major changes to the code fell into several categories:
|
||||
(1) Type sizes: Most of the code used defines for types such as Int32, making
|
||||
it easy to adapt to the GS's 16-bit ints. The interface between the
|
||||
bzip2 program and code designed to be compiled as 'libbzip2,' however,
|
||||
assumes that int is 32 bits, so I had to modify it to use the appropriate
|
||||
integer types on the GS. There were also silent assumptions in some
|
||||
other areas that native ints are 32 bits, and I had to identify and
|
||||
correct these. There were also variables specified as 'Int32' even
|
||||
though 16 bits were sufficient to represent their possible range of
|
||||
values; when I noticed these variables, I changed them appropriately.
|
||||
|
||||
(2) ORCA/C compiler limitations: ORCA/C in its 'small mode' (the only one
|
||||
supported by the GNO libraries) places a 64k restriction on the size
|
||||
of data structures that can be addressed as arrays. This is a problem
|
||||
with bunzip2, which allocates and uses multi-megabyte data structures.
|
||||
To work around this, I changed array-style references to these data
|
||||
structures to use printer arithmetic instead, working around the
|
||||
limitation (eg. I changed references to 'a[b]' to '*(a+b)'. ). I also
|
||||
changed large local variables to be static or dynamically allocated
|
||||
in order to avoid excessive stack usage.
|
||||
|
||||
(3) ORCA/C compiler bugs: In several cases ORCA/C 2.1.0 generated bad code
|
||||
at the maximum optimization level. Most instances where reduced
|
||||
optimization levels are used are necessary to work around bugs encountered
|
||||
when using the disabled optimizations. Also, the size of the main
|
||||
decompression function in decompress.c stresses ORCA/C. I modified
|
||||
the GET_BITS macro to reduce the code size of the BZ2_decompress function
|
||||
by making some of the code into a separate function. If this is not done
|
||||
or if optimization is not enabled (increasing the compiled code size
|
||||
as compared to when optimization is enabled), the compiler will crash,
|
||||
give an error, or generate bad object code that gives linker errors.
|
||||
|
||||
(4) Modifications to work well with GNO and GS/OS These include setting the
|
||||
output filetype and disabling newline translation in GNO's stdio
|
||||
implementation. I also set the stack sizes of the programs to
|
||||
appropriate values and enabled stack checking for the small recursive
|
||||
segment of the program (although it shouldn't actually pose any problem).
|
||||
Additionally, I changed filename operations to be case-insensitive,
|
||||
reflecting the case-insensitive nature of filesystems in the Apple IIgs.
|
||||
|
||||
I made most modifications conditional on the __appleiigs__, __ORCAC__, or
|
||||
__GNO__ macros. Which macro I used gives some hint at the reason for each
|
||||
modification, although all or none should be used to produce a working
|
||||
executable (changes conditionalized on one macro may depend on those
|
||||
conditionalized on another).
|
||||
|
||||
|
||||
COMPILING
|
||||
=========
|
||||
The included Makefile can be used with dmake, occ, and ORCA/C 2.1.0, all of
|
||||
which should be installed in your GNO 2.0.6 installation. You will also need
|
||||
a copy of the lsaneglue library (which is missing from the default GNO 2.0.6
|
||||
installation) to be present in your GNO /lib directory. Run 'dmake bunzip2'
|
||||
to build the main program or 'dmake test' to build both programs and run a
|
||||
simple test to ensure that bunzip2 is working correctly.
|
||||
|
||||
There are some special considerations necessary when compiling the file
|
||||
decompress.c. As noted above, it must be compiled with (nearly) full
|
||||
optimization to compile properly. To compile it with full optimization using
|
||||
ORCA/C 2.1.0, however, requires more than 8MB of memory. Thus, decompress.c
|
||||
(and by extension the bunzip2 program as a whole) can only be compiled on an
|
||||
emulator with 14MB memory support enabled. The only emulators that presently
|
||||
support this are Bernie ][ The Rescue and Sweet16. I have included a
|
||||
prebuilt object file (decompress.o) so that you can rebuild bunzip2 with
|
||||
changes to other source files using a real IIgs.
|
||||
|
||||
|
||||
AREAS FOR IMPROVEMENT
|
||||
=====================
|
||||
* Resource forks and GS/OS filetypes are not supported. This is not a major
|
||||
problem; other programs such as ShrinkIt should be used for GS-specific
|
||||
archives.
|
||||
* Compression could be reenabled. This would require adapting the compression
|
||||
and block sorting routines to work properly under GNO on the GS.
|
||||
* Some or all of the program could be rewritten in assembly language. This
|
||||
would improve its performance by some amount, although I don't know how
|
||||
much. It also might reduce memory usage. This would require a full
|
||||
understanding of the BWT compression and decompression algorithms used in
|
||||
bzip2, which I do not presently possess.
|
||||
|
||||
|
||||
SUPPORT
|
||||
=======
|
||||
I can be contacted by email at sheumann@myrealbox.com . Please contect me,
|
||||
rather than Julian Seward, about any problems that you are experiencing only
|
||||
in the GNO version of bunzip2.
|
||||
|
||||
--
|
||||
Stephen Heumann <sheumann@myrealbox.com>
|
34
Y2K_INFO
34
Y2K_INFO
|
@ -1,34 +0,0 @@
|
|||
|
||||
Y2K status of bzip2 and libbzip2, versions 0.1, 0.9.0 and 0.9.5
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Informally speaking:
|
||||
bzip2 is a compression program built on top of libbzip2,
|
||||
a library which does the real work of compression and
|
||||
decompression. As far as I am aware, libbzip2 does not have
|
||||
any date-related code at all.
|
||||
|
||||
bzip2 itself copies dates from source to destination files
|
||||
when compressing or decompressing, using the 'stat' and 'utime'
|
||||
UNIX system calls. It doesn't examine, manipulate or store the
|
||||
dates in any way. So as far as I can see, there shouldn't be any
|
||||
problem with bzip2 providing 'stat' and 'utime' work correctly
|
||||
on your system.
|
||||
|
||||
On non-unix platforms (those for which BZ_UNIX in bzip2.c is
|
||||
not set to 1), bzip2 doesn't even do the date copying.
|
||||
|
||||
Overall, informally speaking, I don't think bzip2 or libbzip2
|
||||
have a Y2K problem.
|
||||
|
||||
Formally speaking:
|
||||
I am not prepared to offer you any assurance whatsoever
|
||||
regarding Y2K issues in my software. You alone assume the
|
||||
entire risk of using the software. The disclaimer of liability
|
||||
in the LICENSE file in the bzip2 source distribution continues
|
||||
to apply on this issue as with every other issue pertaining
|
||||
to the software.
|
||||
|
||||
Julian Seward
|
||||
Cambridge, UK
|
||||
25 August 1999
|
1141
blocksort.c
1141
blocksort.c
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,362 @@
|
|||
.TH BUNZIP2 1 "9 June 2003"
|
||||
.SH NAME
|
||||
bunzip2 \- a block-sorting file decompressor, v1.0.2gs1
|
||||
.br
|
||||
bzcat \- decompresses files to stdout
|
||||
.br
|
||||
bzip2recover \- recovers data from damaged bzip2 files
|
||||
|
||||
.SH SYNOPSIS
|
||||
.br
|
||||
.B bunzip2
|
||||
.RB [ " \-fkvsVL " ]
|
||||
[
|
||||
.I "filenames \&..."
|
||||
]
|
||||
.br
|
||||
.B bzcat
|
||||
.RB [ " \-s " ]
|
||||
[
|
||||
.I "filenames \&..."
|
||||
]
|
||||
.br
|
||||
.B bzip2recover
|
||||
.I "filename"
|
||||
|
||||
.SH DESCRIPTION
|
||||
.I bunzip2
|
||||
decompresses files created by
|
||||
.I bzip2
|
||||
using the Burrows-Wheeler block sorting
|
||||
text compression algorithm, and Huffman coding.
|
||||
.I bzip2
|
||||
generally achieves
|
||||
considerably better compression than that achieved by more conventional
|
||||
LZ77/LZ78-based compressors, and approaches the performance of the PPM
|
||||
family of statistical compressors.
|
||||
.LP
|
||||
The command-line options are deliberately very similar to
|
||||
those of
|
||||
.I GNU
|
||||
.I gunzip,
|
||||
but they are not identical.
|
||||
.LP
|
||||
.I bunzip2
|
||||
will by default not overwrite existing
|
||||
files. If you want this to happen, specify the \-f flag.
|
||||
.LP
|
||||
.I bunzip2
|
||||
decompresses all specified files. Files which were not created by
|
||||
.I bzip2
|
||||
will be detected and ignored, and a warning issued.
|
||||
.I bunzip2
|
||||
attempts to guess the filename for the decompressed file
|
||||
from that of the compressed file as follows:
|
||||
.LP
|
||||
.nf
|
||||
filename.bz2 becomes filename
|
||||
filename.bz becomes filename
|
||||
filename.tbz2 becomes filename.tar
|
||||
filename.tbz becomes filename.tar
|
||||
anyothername becomes anyothername.out
|
||||
.fi
|
||||
.LP
|
||||
If the file does not end in one of the recognised endings,
|
||||
.I .bz2,
|
||||
.I .bz,
|
||||
.I .tbz2
|
||||
or
|
||||
.I .tbz,
|
||||
.I bunzip2
|
||||
complains that it cannot
|
||||
guess the name of the original file, and uses the original name
|
||||
with
|
||||
.I .out
|
||||
appended.
|
||||
.LP
|
||||
Supplying no filenames causes decompression from
|
||||
standard input to standard output.
|
||||
.LP
|
||||
File name handling is
|
||||
naive in the sense that there is no mechanism for preserving original
|
||||
file names, permissions, ownerships or dates in operating systems or
|
||||
filesystems which lack these concepts, or have serious file name length
|
||||
restrictions, such as MS-DOS or GS/OS.
|
||||
.LP
|
||||
.I bunzip2
|
||||
will correctly decompress a file which is the
|
||||
concatenation of two or more compressed files. The result is the
|
||||
concatenation of the corresponding uncompressed files. Integrity
|
||||
testing (\-t)
|
||||
of concatenated
|
||||
compressed files is also supported.
|
||||
.LP
|
||||
You can also decompress files to the standard output by
|
||||
giving the \-c flag. Multiple files may be
|
||||
decompressed like this. The resulting outputs are fed sequentially to stdout.
|
||||
.LP
|
||||
.I bzcat
|
||||
(or
|
||||
.I bunzip2
|
||||
.I \-c)
|
||||
decompresses all specified files to
|
||||
the standard output.
|
||||
.LP
|
||||
.I bunzip2
|
||||
will read arguments from the environment variables
|
||||
.I BZIP2
|
||||
and
|
||||
.I BZIP,
|
||||
in that order, and will process them
|
||||
before any arguments read from the command line. This gives a
|
||||
convenient way to supply default arguments.
|
||||
.LP
|
||||
As a self-check for your protection,
|
||||
.I bzip2
|
||||
and
|
||||
.I bunzip2
|
||||
use 32-bit CRCs to
|
||||
make sure that the decompressed version of a file is identical to the
|
||||
original. This guards against corruption of the compressed data, and
|
||||
against undetected bugs in
|
||||
.I bzip2
|
||||
and
|
||||
.I bunzip2
|
||||
(hopefully very unlikely). The
|
||||
chances of data corruption going undetected are microscopic, about one
|
||||
chance in four billion for each file processed. Be aware, though, that
|
||||
the check occurs upon decompression, so it can only tell you that
|
||||
something is wrong. It can't help you
|
||||
recover the original uncompressed
|
||||
data. You can use
|
||||
.I bzip2recover
|
||||
to try to recover data from
|
||||
damaged files.
|
||||
.LP
|
||||
This manual page pertains to version 1.0.2gs1 of
|
||||
.I bunzip2.
|
||||
It is fully campatible with compressed data created with all of the previous
|
||||
public releases of bzip2, versions
|
||||
0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1, as well as version 1.0.2.
|
||||
.LP
|
||||
Return values: 0 for a normal exit, 1 for environmental problems (file
|
||||
not found, invalid flags, I/O errors, &c), 2 to indicate a corrupt
|
||||
compressed file, 3 for an internal consistency error (eg, bug) which
|
||||
caused
|
||||
.I bunzip2
|
||||
to panic.
|
||||
.LP
|
||||
.SH OPTIONS
|
||||
.IP "\fB\-c\fP \fB\--stdout\fP"
|
||||
Decompress to standard output.
|
||||
|
||||
.IP "\fB\-d\fP \fB\--decompress\fP"
|
||||
Force decompression. This flag is unnecessary on bunzip2 for GNO,
|
||||
since it always decompresses.
|
||||
|
||||
.IP "\fB\-t\fP \fB\--test\fP"
|
||||
Check integrity of the specified file(s), but don't decompress them.
|
||||
This really performs a trial decompression and throws away the result.
|
||||
|
||||
.IP "\fB\-f\fP \fB\--force\fP"
|
||||
Force overwrite of output files. Normally,
|
||||
.I bunzip2
|
||||
will not overwrite
|
||||
existing output files.
|
||||
.sp
|
||||
.I bunzip2
|
||||
normally declines to decompress files which don't have the
|
||||
correct magic header bytes. If forced (-f), however, it will pass
|
||||
such files through unmodified. This is how GNU gzip behaves.
|
||||
|
||||
.IP "\fB\-k\fP \fB\--keep\fP"
|
||||
Keep (don't delete) input files during decompression.
|
||||
|
||||
.IP "\fB\-s\fP \fB\--small\fP"
|
||||
Reduce memory usage, for decompression and testing. Files
|
||||
are decompressed and tested using a modified algorithm which only
|
||||
requires 2.5 bytes per block byte. This means any file can be
|
||||
decompressed in 2300k of memory, albeit at about half the normal speed.
|
||||
.sp
|
||||
In short, if your machine is low on memory (5 megabytes or
|
||||
less), you will probably need to use \-s. See MEMORY MANAGEMENT below.
|
||||
|
||||
.IP "\fB\-q\fP \fB\--quiet\fP"
|
||||
Suppress non-essential warning messages. Messages pertaining to
|
||||
I/O errors and other critical events will not be suppressed.
|
||||
|
||||
.IP "\fB\-v\fP \fB\--verbose\fP"
|
||||
Verbose mode -- show the compression ratio for each file processed.
|
||||
Further \-v's increase the verbosity level, spewing out lots of
|
||||
information which is primarily of interest for diagnostic purposes.
|
||||
|
||||
.IP "\fB\-L\fP \fB\--license\fP \fB\-V\fP \fB\--version\fP"
|
||||
Display the software version, license terms and conditions.
|
||||
|
||||
.IP "\fB\--\fP"
|
||||
Treats all subsequent arguments as file names, even if they start
|
||||
with a dash. This is so you can handle files with names beginning
|
||||
with a dash, for example: bunzip2 \-- \-myfilename.
|
||||
.LP
|
||||
.SH MEMORY MANAGEMENT
|
||||
.I bzip2
|
||||
compresses large files in blocks. The block size affects
|
||||
both the compression ratio achieved, and the amount of memory needed for
|
||||
compression and decompression. The block size can be specified
|
||||
to be 100,000 bytes through 900,000 bytes (the
|
||||
default). At decompression time, the block size used for
|
||||
compression is read from the header of the compressed file, and
|
||||
.I bunzip2
|
||||
then allocates itself just enough memory to decompress
|
||||
the file.
|
||||
.LP
|
||||
Decompression requirements, in bytes, can be estimated as:
|
||||
.LP
|
||||
.nf
|
||||
100k + ( 4 x block size ), or
|
||||
100k + ( 2.5 x block size ) if using \-s
|
||||
.fi
|
||||
.LP
|
||||
For files compressed with the default 900k block size,
|
||||
.I bunzip2
|
||||
will require about 3700 kbytes to decompress. To support decompression
|
||||
of any file on a 4 megabyte machine,
|
||||
.I bunzip2
|
||||
has an option to
|
||||
decompress using approximately half this amount of memory, about 2300
|
||||
kbytes. Decompression speed is also halved, so you should use this
|
||||
option only where necessary. The relevant flag is -s.
|
||||
.LP
|
||||
Decompression speeds are virtually unaffected by block size.
|
||||
.LP
|
||||
Another significant point applies to files which fit in a single block
|
||||
-- that means most files you'd encounter using a large block size. The
|
||||
amount of real memory touched is proportional to the size of the file,
|
||||
since the file is smaller than a block. For example, compressing a file
|
||||
20,000 bytes long with a 900k block size will cause the decompressor to
|
||||
allocate 3700k but only touch 100k + 20000 * 4 = 180 kbytes
|
||||
when decompressing it.
|
||||
.LP
|
||||
Here is a table which summarises the maximum memory usage for different
|
||||
block sizes. Also recorded is the total compressed size for 14 files of
|
||||
the Calgary Text Compression Corpus totalling 3,141,622 bytes. This
|
||||
column gives some feel for how compression varies with block size.
|
||||
These figures tend to understate the advantage of larger block sizes for
|
||||
larger files, since the Corpus is dominated by smaller files.
|
||||
.LP
|
||||
.nf
|
||||
Block Decompress Decompress Corpus
|
||||
Size usage -s usage Size
|
||||
.fi
|
||||
.LP
|
||||
.nf
|
||||
100k 500k 350k 914704
|
||||
200k 900k 600k 877703
|
||||
300k 1300k 850k 860338
|
||||
400k 1700k 1100k 846899
|
||||
500k 2100k 1350k 845160
|
||||
600k 2500k 1600k 838626
|
||||
700k 2900k 1850k 834096
|
||||
800k 3300k 2100k 828642
|
||||
900k 3700k 2350k 828642
|
||||
.fi
|
||||
.LP
|
||||
.SH RECOVERING DATA FROM DAMAGED FILES
|
||||
.I bzip2
|
||||
compresses files in blocks, usually 900kbytes long. Each
|
||||
block is handled independently. If a media or transmission error causes
|
||||
a multi-block .bz2
|
||||
file to become damaged, it may be possible to
|
||||
recover data from the undamaged blocks in the file.
|
||||
.LP
|
||||
The compressed representation of each block is delimited by a 48-bit
|
||||
pattern, which makes it possible to find the block boundaries with
|
||||
reasonable certainty. Each block also carries its own 32-bit CRC, so
|
||||
damaged blocks can be distinguished from undamaged ones.
|
||||
.LP
|
||||
.I bzip2recover
|
||||
is a simple program whose purpose is to search for blocks in .bz2 files,
|
||||
and write each block out into its own .bz2 file. You can then use
|
||||
.I bunzip2
|
||||
\-t
|
||||
to test the
|
||||
integrity of the resulting files, and decompress those which are
|
||||
undamaged.
|
||||
.LP
|
||||
.I bzip2recover
|
||||
takes a single argument, the name of the damaged file,
|
||||
and writes a number of files named "rec0001file.bz2",
|
||||
"rec0002file.bz2", etc, containing the extracted blocks.
|
||||
The output filenames are designed so that the use of
|
||||
wildcards in subsequent processing -- for example,
|
||||
"bunzip2 -c rec*file.bz2 > recovered_data" -- processes the files in
|
||||
the correct order.
|
||||
.LP
|
||||
.I bzip2recover
|
||||
should be of most use dealing with large .bz2
|
||||
files, as these will contain many blocks. It is clearly
|
||||
futile to use it on damaged single-block files, since a
|
||||
damaged block cannot be recovered. If you wish to minimise
|
||||
any potential data loss through media or transmission errors,
|
||||
you might consider compressing with a smaller
|
||||
block size.
|
||||
.LP
|
||||
.SH PERFORMANCE NOTES
|
||||
.I bunzip2
|
||||
usually allocates several megabytes of memory to operate
|
||||
in, and then charges all over it in a fairly random fashion. This means
|
||||
that performance is largely determined by the speed at which your machine can
|
||||
access main memory or (if you have a caching accelerator) serve cache misses.
|
||||
Because of this, small changes to the code to reduce the miss rate have
|
||||
been observed to give disproportionately large performance improvements.
|
||||
I imagine that
|
||||
.I bunzip2
|
||||
will perform best on machines with very large caches.
|
||||
.LP
|
||||
.SH CAVEATS
|
||||
I/O error messages are not as helpful as they could be.
|
||||
.I bunzip2
|
||||
tries hard to detect I/O errors and exit cleanly, but the details of
|
||||
what the problem is sometimes seem rather misleading.
|
||||
.LP
|
||||
.I bzip2recover
|
||||
for GNO uses 32-bit integers to represent bit positions in compressed files,
|
||||
so it cannot handle compressed files more than 512 megabytes long.
|
||||
.LP
|
||||
|
||||
.SH AUTHOR
|
||||
Julian Seward, jseward@acm.org.
|
||||
.LP
|
||||
http://sources.redhat.com/bzip2
|
||||
.LP
|
||||
The ideas embodied in
|
||||
.I bzip2
|
||||
are due to (at least) the following
|
||||
people: Michael Burrows and David Wheeler (for the block sorting
|
||||
transformation), David Wheeler (again, for the Huffman coder), Peter
|
||||
Fenwick (for the structured coding model in the original
|
||||
.I bzip,
|
||||
and many refinements), and Alistair Moffat, Radford Neal and Ian Witten
|
||||
(for the arithmetic coder in the original
|
||||
.I bzip).
|
||||
I am much
|
||||
indebted for their help, support and advice. See the manual in the
|
||||
source distribution for pointers to sources of documentation. Christian
|
||||
von Roques encouraged me to look for faster sorting algorithms, so as to
|
||||
speed up compression. Bela Lubkin encouraged me to improve the
|
||||
worst-case compression performance. Many people sent patches, helped
|
||||
with portability problems, lent machines, gave advice and were generally
|
||||
helpful.
|
||||
.LP
|
||||
This version of
|
||||
.I bunzip2
|
||||
for GNO has been ported by Stephen Heumann <sheumann@myrealbox.com> from
|
||||
Julian Seward's
|
||||
.I bzip2
|
||||
version 1.0.2 for other platforms.
|
||||
.LP
|
||||
This program contains material from the ORCA/C Run-Time Libraries,
|
||||
copyright 1987-1996 by Byte Works, Inc. Used with permission.
|
||||
.LP
|
||||
It also incorporates a public domain stristr routine by Fred Cole,
|
||||
Bob Stout, and Greg Thayer, which was obtained from http://www.snippets.org .
|
|
@ -0,0 +1,13 @@
|
|||
Name: bunzip2
|
||||
Version: 1.0.2
|
||||
Shell: GNO/ME
|
||||
Author: Stephen Heumann (GNO port of original code by Julian Seward)
|
||||
Contact: sheumann@myrealbox.com
|
||||
Where: /usr/local/bin
|
||||
FTP: ftp.gno.org
|
||||
|
||||
Decompression program for files compressed in the bzip2 format. Based
|
||||
on Julian Seward's bzip2 program, but only supports file decompression and
|
||||
testing, not compression. Can also be used as bzcat, writing decompressed
|
||||
data to stdout. Also includes bzip2recover program for restoring data
|
||||
from partially corrupted bzip2 archives.
|
|
@ -0,0 +1,15 @@
|
|||
#include "/lang/orca/libraries/rinclude/Types.Rez"
|
||||
|
||||
resource rVersion (0x1, purgeable3, nocrossbank) {
|
||||
|
||||
{ 1, 0, 2, /* version 1.0.2 */
|
||||
release, /* development|alpha|beta|final|release */
|
||||
0 /* non-final release number */
|
||||
},
|
||||
verUS, /* country code -- only some are avail */
|
||||
"bunzip2", /* name */
|
||||
/* _Very_ brief descrition. Check "file info" */
|
||||
/* shown in the Finder to see if it's too long */
|
||||
/* Note that \n is used to separate lines here. */
|
||||
"Bzip2 archive decompression program\n"
|
||||
};
|
76
bzdiff
76
bzdiff
|
@ -1,76 +0,0 @@
|
|||
#!/bin/sh
|
||||
# sh is buggy on RS/6000 AIX 3.2. Replace above line with #!/bin/ksh
|
||||
|
||||
# Bzcmp/diff wrapped for bzip2,
|
||||
# adapted from zdiff by Philippe Troin <phil@fifi.org> for Debian GNU/Linux.
|
||||
|
||||
# Bzcmp and bzdiff are used to invoke the cmp or the diff pro-
|
||||
# gram on compressed files. All options specified are passed
|
||||
# directly to cmp or diff. If only 1 file is specified, then
|
||||
# the files compared are file1 and an uncompressed file1.gz.
|
||||
# If two files are specified, then they are uncompressed (if
|
||||
# necessary) and fed to cmp or diff. The exit status from cmp
|
||||
# or diff is preserved.
|
||||
|
||||
PATH="/usr/bin:$PATH"; export PATH
|
||||
prog=`echo $0 | sed 's|.*/||'`
|
||||
case "$prog" in
|
||||
*cmp) comp=${CMP-cmp} ;;
|
||||
*) comp=${DIFF-diff} ;;
|
||||
esac
|
||||
|
||||
OPTIONS=
|
||||
FILES=
|
||||
for ARG
|
||||
do
|
||||
case "$ARG" in
|
||||
-*) OPTIONS="$OPTIONS $ARG";;
|
||||
*) if test -f "$ARG"; then
|
||||
FILES="$FILES $ARG"
|
||||
else
|
||||
echo "${prog}: $ARG not found or not a regular file"
|
||||
exit 1
|
||||
fi ;;
|
||||
esac
|
||||
done
|
||||
if test -z "$FILES"; then
|
||||
echo "Usage: $prog [${comp}_options] file [file]"
|
||||
exit 1
|
||||
fi
|
||||
tmp=`tempfile -d /tmp -p bz` || {
|
||||
echo 'cannot create a temporary file' >&2
|
||||
exit 1
|
||||
}
|
||||
set $FILES
|
||||
if test $# -eq 1; then
|
||||
FILE=`echo "$1" | sed 's/.bz2$//'`
|
||||
bzip2 -cd "$FILE.bz2" | $comp $OPTIONS - "$FILE"
|
||||
STAT="$?"
|
||||
|
||||
elif test $# -eq 2; then
|
||||
case "$1" in
|
||||
*.bz2)
|
||||
case "$2" in
|
||||
*.bz2)
|
||||
F=`echo "$2" | sed 's|.*/||;s|.bz2$||'`
|
||||
bzip2 -cdfq "$2" > $tmp
|
||||
bzip2 -cdfq "$1" | $comp $OPTIONS - $tmp
|
||||
STAT="$?"
|
||||
/bin/rm -f $tmp;;
|
||||
|
||||
*) bzip2 -cdfq "$1" | $comp $OPTIONS - "$2"
|
||||
STAT="$?";;
|
||||
esac;;
|
||||
*) case "$2" in
|
||||
*.bz2)
|
||||
bzip2 -cdfq "$2" | $comp $OPTIONS "$1" -
|
||||
STAT="$?";;
|
||||
*) $comp $OPTIONS "$1" "$2"
|
||||
STAT="$?";;
|
||||
esac;;
|
||||
esac
|
||||
exit "$STAT"
|
||||
else
|
||||
echo "Usage: $prog [${comp}_options] file [file]"
|
||||
exit 1
|
||||
fi
|
47
bzdiff.1
47
bzdiff.1
|
@ -1,47 +0,0 @@
|
|||
\"Shamelessly copied from zmore.1 by Philippe Troin <phil@fifi.org>
|
||||
\"for Debian GNU/Linux
|
||||
.TH BZDIFF 1
|
||||
.SH NAME
|
||||
bzcmp, bzdiff \- compare bzip2 compressed files
|
||||
.SH SYNOPSIS
|
||||
.B bzcmp
|
||||
[ cmp_options ] file1
|
||||
[ file2 ]
|
||||
.br
|
||||
.B bzdiff
|
||||
[ diff_options ] file1
|
||||
[ file2 ]
|
||||
.SH DESCRIPTION
|
||||
.I Bzcmp
|
||||
and
|
||||
.I bzdiff
|
||||
are used to invoke the
|
||||
.I cmp
|
||||
or the
|
||||
.I diff
|
||||
program on bzip2 compressed files. All options specified are passed
|
||||
directly to
|
||||
.I cmp
|
||||
or
|
||||
.IR diff "."
|
||||
If only 1 file is specified, then the files compared are
|
||||
.I file1
|
||||
and an uncompressed
|
||||
.IR file1 ".bz2."
|
||||
If two files are specified, then they are uncompressed if necessary and fed to
|
||||
.I cmp
|
||||
or
|
||||
.IR diff "."
|
||||
The exit status from
|
||||
.I cmp
|
||||
or
|
||||
.I diff
|
||||
is preserved.
|
||||
.SH "SEE ALSO"
|
||||
cmp(1), diff(1), bzmore(1), bzless(1), bzgrep(1), bzip2(1)
|
||||
.SH BUGS
|
||||
Messages from the
|
||||
.I cmp
|
||||
or
|
||||
.I diff
|
||||
programs refer to temporary filenames instead of those specified.
|
71
bzgrep
71
bzgrep
|
@ -1,71 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Bzgrep wrapped for bzip2,
|
||||
# adapted from zgrep by Philippe Troin <phil@fifi.org> for Debian GNU/Linux.
|
||||
## zgrep notice:
|
||||
## zgrep -- a wrapper around a grep program that decompresses files as needed
|
||||
## Adapted from a version sent by Charles Levert <charles@comm.polymtl.ca>
|
||||
|
||||
PATH="/usr/bin:$PATH"; export PATH
|
||||
|
||||
prog=`echo $0 | sed 's|.*/||'`
|
||||
case "$prog" in
|
||||
*egrep) grep=${EGREP-egrep} ;;
|
||||
*fgrep) grep=${FGREP-fgrep} ;;
|
||||
*) grep=${GREP-grep} ;;
|
||||
esac
|
||||
pat=""
|
||||
while test $# -ne 0; do
|
||||
case "$1" in
|
||||
-e | -f) opt="$opt $1"; shift; pat="$1"
|
||||
if test "$grep" = grep; then # grep is buggy with -e on SVR4
|
||||
grep=egrep
|
||||
fi;;
|
||||
-A | -B) opt="$opt $1 $2"; shift;;
|
||||
-*) opt="$opt $1";;
|
||||
*) if test -z "$pat"; then
|
||||
pat="$1"
|
||||
else
|
||||
break;
|
||||
fi;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
if test -z "$pat"; then
|
||||
echo "grep through bzip2 files"
|
||||
echo "usage: $prog [grep_options] pattern [files]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
list=0
|
||||
silent=0
|
||||
op=`echo "$opt" | sed -e 's/ //g' -e 's/-//g'`
|
||||
case "$op" in
|
||||
*l*) list=1
|
||||
esac
|
||||
case "$op" in
|
||||
*h*) silent=1
|
||||
esac
|
||||
|
||||
if test $# -eq 0; then
|
||||
bzip2 -cdfq | $grep $opt "$pat"
|
||||
exit $?
|
||||
fi
|
||||
|
||||
res=0
|
||||
for i do
|
||||
if test -f "$i"; then :; else if test -f "$i.bz2"; then i="$i.bz2"; fi; fi
|
||||
if test $list -eq 1; then
|
||||
bzip2 -cdfq "$i" | $grep $opt "$pat" 2>&1 > /dev/null && echo $i
|
||||
r=$?
|
||||
elif test $# -eq 1 -o $silent -eq 1; then
|
||||
bzip2 -cdfq "$i" | $grep $opt "$pat"
|
||||
r=$?
|
||||
else
|
||||
bzip2 -cdfq "$i" | $grep $opt "$pat" | sed "s|^|${i}:|"
|
||||
r=$?
|
||||
fi
|
||||
test "$r" -ne 0 && res="$r"
|
||||
done
|
||||
exit $res
|
56
bzgrep.1
56
bzgrep.1
|
@ -1,56 +0,0 @@
|
|||
\"Shamelessly copied from zmore.1 by Philippe Troin <phil@fifi.org>
|
||||
\"for Debian GNU/Linux
|
||||
.TH BZGREP 1
|
||||
.SH NAME
|
||||
bzgrep, bzfgrep, bzegrep \- search possibly bzip2 compressed files for a regular expression
|
||||
.SH SYNOPSIS
|
||||
.B bzgrep
|
||||
[ grep_options ]
|
||||
.BI [\ -e\ ] " pattern"
|
||||
.IR filename ".\|.\|."
|
||||
.br
|
||||
.B bzegrep
|
||||
[ egrep_options ]
|
||||
.BI [\ -e\ ] " pattern"
|
||||
.IR filename ".\|.\|."
|
||||
.br
|
||||
.B bzfgrep
|
||||
[ fgrep_options ]
|
||||
.BI [\ -e\ ] " pattern"
|
||||
.IR filename ".\|.\|."
|
||||
.SH DESCRIPTION
|
||||
.IR Bzgrep
|
||||
is used to invoke the
|
||||
.I grep
|
||||
on bzip2-compressed files. All options specified are passed directly to
|
||||
.I grep.
|
||||
If no file is specified, then the standard input is decompressed
|
||||
if necessary and fed to grep.
|
||||
Otherwise the given files are uncompressed if necessary and fed to
|
||||
.I grep.
|
||||
.PP
|
||||
If
|
||||
.I bzgrep
|
||||
is invoked as
|
||||
.I bzegrep
|
||||
or
|
||||
.I bzfgrep
|
||||
then
|
||||
.I egrep
|
||||
or
|
||||
.I fgrep
|
||||
is used instead of
|
||||
.I grep.
|
||||
If the GREP environment variable is set,
|
||||
.I bzgrep
|
||||
uses it as the
|
||||
.I grep
|
||||
program to be invoked. For example:
|
||||
|
||||
for sh: GREP=fgrep bzgrep string files
|
||||
for csh: (setenv GREP fgrep; bzgrep string files)
|
||||
.SH AUTHOR
|
||||
Charles Levert (charles@comm.polymtl.ca). Adapted to bzip2 by Philippe
|
||||
Troin <phil@fifi.org> for Debian GNU/Linux.
|
||||
.SH "SEE ALSO"
|
||||
grep(1), egrep(1), fgrep(1), bzdiff(1), bzmore(1), bzless(1), bzip2(1)
|
453
bzip2.1
453
bzip2.1
|
@ -1,453 +0,0 @@
|
|||
.PU
|
||||
.TH bzip2 1
|
||||
.SH NAME
|
||||
bzip2, bunzip2 \- a block-sorting file compressor, v1.0.2
|
||||
.br
|
||||
bzcat \- decompresses files to stdout
|
||||
.br
|
||||
bzip2recover \- recovers data from damaged bzip2 files
|
||||
|
||||
.SH SYNOPSIS
|
||||
.ll +8
|
||||
.B bzip2
|
||||
.RB [ " \-cdfkqstvzVL123456789 " ]
|
||||
[
|
||||
.I "filenames \&..."
|
||||
]
|
||||
.ll -8
|
||||
.br
|
||||
.B bunzip2
|
||||
.RB [ " \-fkvsVL " ]
|
||||
[
|
||||
.I "filenames \&..."
|
||||
]
|
||||
.br
|
||||
.B bzcat
|
||||
.RB [ " \-s " ]
|
||||
[
|
||||
.I "filenames \&..."
|
||||
]
|
||||
.br
|
||||
.B bzip2recover
|
||||
.I "filename"
|
||||
|
||||
.SH DESCRIPTION
|
||||
.I bzip2
|
||||
compresses files using the Burrows-Wheeler block sorting
|
||||
text compression algorithm, and Huffman coding. Compression is
|
||||
generally considerably better than that achieved by more conventional
|
||||
LZ77/LZ78-based compressors, and approaches the performance of the PPM
|
||||
family of statistical compressors.
|
||||
|
||||
The command-line options are deliberately very similar to
|
||||
those of
|
||||
.I GNU gzip,
|
||||
but they are not identical.
|
||||
|
||||
.I bzip2
|
||||
expects a list of file names to accompany the
|
||||
command-line flags. Each file is replaced by a compressed version of
|
||||
itself, with the name "original_name.bz2".
|
||||
Each compressed file
|
||||
has the same modification date, permissions, and, when possible,
|
||||
ownership as the corresponding original, so that these properties can
|
||||
be correctly restored at decompression time. File name handling is
|
||||
naive in the sense that there is no mechanism for preserving original
|
||||
file names, permissions, ownerships or dates in filesystems which lack
|
||||
these concepts, or have serious file name length restrictions, such as
|
||||
MS-DOS.
|
||||
|
||||
.I bzip2
|
||||
and
|
||||
.I bunzip2
|
||||
will by default not overwrite existing
|
||||
files. If you want this to happen, specify the \-f flag.
|
||||
|
||||
If no file names are specified,
|
||||
.I bzip2
|
||||
compresses from standard
|
||||
input to standard output. In this case,
|
||||
.I bzip2
|
||||
will decline to
|
||||
write compressed output to a terminal, as this would be entirely
|
||||
incomprehensible and therefore pointless.
|
||||
|
||||
.I bunzip2
|
||||
(or
|
||||
.I bzip2 \-d)
|
||||
decompresses all
|
||||
specified files. Files which were not created by
|
||||
.I bzip2
|
||||
will be detected and ignored, and a warning issued.
|
||||
.I bzip2
|
||||
attempts to guess the filename for the decompressed file
|
||||
from that of the compressed file as follows:
|
||||
|
||||
filename.bz2 becomes filename
|
||||
filename.bz becomes filename
|
||||
filename.tbz2 becomes filename.tar
|
||||
filename.tbz becomes filename.tar
|
||||
anyothername becomes anyothername.out
|
||||
|
||||
If the file does not end in one of the recognised endings,
|
||||
.I .bz2,
|
||||
.I .bz,
|
||||
.I .tbz2
|
||||
or
|
||||
.I .tbz,
|
||||
.I bzip2
|
||||
complains that it cannot
|
||||
guess the name of the original file, and uses the original name
|
||||
with
|
||||
.I .out
|
||||
appended.
|
||||
|
||||
As with compression, supplying no
|
||||
filenames causes decompression from
|
||||
standard input to standard output.
|
||||
|
||||
.I bunzip2
|
||||
will correctly decompress a file which is the
|
||||
concatenation of two or more compressed files. The result is the
|
||||
concatenation of the corresponding uncompressed files. Integrity
|
||||
testing (\-t)
|
||||
of concatenated
|
||||
compressed files is also supported.
|
||||
|
||||
You can also compress or decompress files to the standard output by
|
||||
giving the \-c flag. Multiple files may be compressed and
|
||||
decompressed like this. The resulting outputs are fed sequentially to
|
||||
stdout. Compression of multiple files
|
||||
in this manner generates a stream
|
||||
containing multiple compressed file representations. Such a stream
|
||||
can be decompressed correctly only by
|
||||
.I bzip2
|
||||
version 0.9.0 or
|
||||
later. Earlier versions of
|
||||
.I bzip2
|
||||
will stop after decompressing
|
||||
the first file in the stream.
|
||||
|
||||
.I bzcat
|
||||
(or
|
||||
.I bzip2 -dc)
|
||||
decompresses all specified files to
|
||||
the standard output.
|
||||
|
||||
.I bzip2
|
||||
will read arguments from the environment variables
|
||||
.I BZIP2
|
||||
and
|
||||
.I BZIP,
|
||||
in that order, and will process them
|
||||
before any arguments read from the command line. This gives a
|
||||
convenient way to supply default arguments.
|
||||
|
||||
Compression is always performed, even if the compressed
|
||||
file is slightly
|
||||
larger than the original. Files of less than about one hundred bytes
|
||||
tend to get larger, since the compression mechanism has a constant
|
||||
overhead in the region of 50 bytes. Random data (including the output
|
||||
of most file compressors) is coded at about 8.05 bits per byte, giving
|
||||
an expansion of around 0.5%.
|
||||
|
||||
As a self-check for your protection,
|
||||
.I
|
||||
bzip2
|
||||
uses 32-bit CRCs to
|
||||
make sure that the decompressed version of a file is identical to the
|
||||
original. This guards against corruption of the compressed data, and
|
||||
against undetected bugs in
|
||||
.I bzip2
|
||||
(hopefully very unlikely). The
|
||||
chances of data corruption going undetected is microscopic, about one
|
||||
chance in four billion for each file processed. Be aware, though, that
|
||||
the check occurs upon decompression, so it can only tell you that
|
||||
something is wrong. It can't help you
|
||||
recover the original uncompressed
|
||||
data. You can use
|
||||
.I bzip2recover
|
||||
to try to recover data from
|
||||
damaged files.
|
||||
|
||||
Return values: 0 for a normal exit, 1 for environmental problems (file
|
||||
not found, invalid flags, I/O errors, &c), 2 to indicate a corrupt
|
||||
compressed file, 3 for an internal consistency error (eg, bug) which
|
||||
caused
|
||||
.I bzip2
|
||||
to panic.
|
||||
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
.B \-c --stdout
|
||||
Compress or decompress to standard output.
|
||||
.TP
|
||||
.B \-d --decompress
|
||||
Force decompression.
|
||||
.I bzip2,
|
||||
.I bunzip2
|
||||
and
|
||||
.I bzcat
|
||||
are
|
||||
really the same program, and the decision about what actions to take is
|
||||
done on the basis of which name is used. This flag overrides that
|
||||
mechanism, and forces
|
||||
.I bzip2
|
||||
to decompress.
|
||||
.TP
|
||||
.B \-z --compress
|
||||
The complement to \-d: forces compression, regardless of the
|
||||
invocation name.
|
||||
.TP
|
||||
.B \-t --test
|
||||
Check integrity of the specified file(s), but don't decompress them.
|
||||
This really performs a trial decompression and throws away the result.
|
||||
.TP
|
||||
.B \-f --force
|
||||
Force overwrite of output files. Normally,
|
||||
.I bzip2
|
||||
will not overwrite
|
||||
existing output files. Also forces
|
||||
.I bzip2
|
||||
to break hard links
|
||||
to files, which it otherwise wouldn't do.
|
||||
|
||||
bzip2 normally declines to decompress files which don't have the
|
||||
correct magic header bytes. If forced (-f), however, it will pass
|
||||
such files through unmodified. This is how GNU gzip behaves.
|
||||
.TP
|
||||
.B \-k --keep
|
||||
Keep (don't delete) input files during compression
|
||||
or decompression.
|
||||
.TP
|
||||
.B \-s --small
|
||||
Reduce memory usage, for compression, decompression and testing. Files
|
||||
are decompressed and tested using a modified algorithm which only
|
||||
requires 2.5 bytes per block byte. This means any file can be
|
||||
decompressed in 2300k of memory, albeit at about half the normal speed.
|
||||
|
||||
During compression, \-s selects a block size of 200k, which limits
|
||||
memory use to around the same figure, at the expense of your compression
|
||||
ratio. In short, if your machine is low on memory (8 megabytes or
|
||||
less), use \-s for everything. See MEMORY MANAGEMENT below.
|
||||
.TP
|
||||
.B \-q --quiet
|
||||
Suppress non-essential warning messages. Messages pertaining to
|
||||
I/O errors and other critical events will not be suppressed.
|
||||
.TP
|
||||
.B \-v --verbose
|
||||
Verbose mode -- show the compression ratio for each file processed.
|
||||
Further \-v's increase the verbosity level, spewing out lots of
|
||||
information which is primarily of interest for diagnostic purposes.
|
||||
.TP
|
||||
.B \-L --license -V --version
|
||||
Display the software version, license terms and conditions.
|
||||
.TP
|
||||
.B \-1 (or \-\-fast) to \-9 (or \-\-best)
|
||||
Set the block size to 100 k, 200 k .. 900 k when compressing. Has no
|
||||
effect when decompressing. See MEMORY MANAGEMENT below.
|
||||
The \-\-fast and \-\-best aliases are primarily for GNU gzip
|
||||
compatibility. In particular, \-\-fast doesn't make things
|
||||
significantly faster.
|
||||
And \-\-best merely selects the default behaviour.
|
||||
.TP
|
||||
.B \--
|
||||
Treats all subsequent arguments as file names, even if they start
|
||||
with a dash. This is so you can handle files with names beginning
|
||||
with a dash, for example: bzip2 \-- \-myfilename.
|
||||
.TP
|
||||
.B \--repetitive-fast --repetitive-best
|
||||
These flags are redundant in versions 0.9.5 and above. They provided
|
||||
some coarse control over the behaviour of the sorting algorithm in
|
||||
earlier versions, which was sometimes useful. 0.9.5 and above have an
|
||||
improved algorithm which renders these flags irrelevant.
|
||||
|
||||
.SH MEMORY MANAGEMENT
|
||||
.I bzip2
|
||||
compresses large files in blocks. The block size affects
|
||||
both the compression ratio achieved, and the amount of memory needed for
|
||||
compression and decompression. The flags \-1 through \-9
|
||||
specify the block size to be 100,000 bytes through 900,000 bytes (the
|
||||
default) respectively. At decompression time, the block size used for
|
||||
compression is read from the header of the compressed file, and
|
||||
.I bunzip2
|
||||
then allocates itself just enough memory to decompress
|
||||
the file. Since block sizes are stored in compressed files, it follows
|
||||
that the flags \-1 to \-9 are irrelevant to and so ignored
|
||||
during decompression.
|
||||
|
||||
Compression and decompression requirements,
|
||||
in bytes, can be estimated as:
|
||||
|
||||
Compression: 400k + ( 8 x block size )
|
||||
|
||||
Decompression: 100k + ( 4 x block size ), or
|
||||
100k + ( 2.5 x block size )
|
||||
|
||||
Larger block sizes give rapidly diminishing marginal returns. Most of
|
||||
the compression comes from the first two or three hundred k of block
|
||||
size, a fact worth bearing in mind when using
|
||||
.I bzip2
|
||||
on small machines.
|
||||
It is also important to appreciate that the decompression memory
|
||||
requirement is set at compression time by the choice of block size.
|
||||
|
||||
For files compressed with the default 900k block size,
|
||||
.I bunzip2
|
||||
will require about 3700 kbytes to decompress. To support decompression
|
||||
of any file on a 4 megabyte machine,
|
||||
.I bunzip2
|
||||
has an option to
|
||||
decompress using approximately half this amount of memory, about 2300
|
||||
kbytes. Decompression speed is also halved, so you should use this
|
||||
option only where necessary. The relevant flag is -s.
|
||||
|
||||
In general, try and use the largest block size memory constraints allow,
|
||||
since that maximises the compression achieved. Compression and
|
||||
decompression speed are virtually unaffected by block size.
|
||||
|
||||
Another significant point applies to files which fit in a single block
|
||||
-- that means most files you'd encounter using a large block size. The
|
||||
amount of real memory touched is proportional to the size of the file,
|
||||
since the file is smaller than a block. For example, compressing a file
|
||||
20,000 bytes long with the flag -9 will cause the compressor to
|
||||
allocate around 7600k of memory, but only touch 400k + 20000 * 8 = 560
|
||||
kbytes of it. Similarly, the decompressor will allocate 3700k but only
|
||||
touch 100k + 20000 * 4 = 180 kbytes.
|
||||
|
||||
Here is a table which summarises the maximum memory usage for different
|
||||
block sizes. Also recorded is the total compressed size for 14 files of
|
||||
the Calgary Text Compression Corpus totalling 3,141,622 bytes. This
|
||||
column gives some feel for how compression varies with block size.
|
||||
These figures tend to understate the advantage of larger block sizes for
|
||||
larger files, since the Corpus is dominated by smaller files.
|
||||
|
||||
Compress Decompress Decompress Corpus
|
||||
Flag usage usage -s usage Size
|
||||
|
||||
-1 1200k 500k 350k 914704
|
||||
-2 2000k 900k 600k 877703
|
||||
-3 2800k 1300k 850k 860338
|
||||
-4 3600k 1700k 1100k 846899
|
||||
-5 4400k 2100k 1350k 845160
|
||||
-6 5200k 2500k 1600k 838626
|
||||
-7 6100k 2900k 1850k 834096
|
||||
-8 6800k 3300k 2100k 828642
|
||||
-9 7600k 3700k 2350k 828642
|
||||
|
||||
.SH RECOVERING DATA FROM DAMAGED FILES
|
||||
.I bzip2
|
||||
compresses files in blocks, usually 900kbytes long. Each
|
||||
block is handled independently. If a media or transmission error causes
|
||||
a multi-block .bz2
|
||||
file to become damaged, it may be possible to
|
||||
recover data from the undamaged blocks in the file.
|
||||
|
||||
The compressed representation of each block is delimited by a 48-bit
|
||||
pattern, which makes it possible to find the block boundaries with
|
||||
reasonable certainty. Each block also carries its own 32-bit CRC, so
|
||||
damaged blocks can be distinguished from undamaged ones.
|
||||
|
||||
.I bzip2recover
|
||||
is a simple program whose purpose is to search for
|
||||
blocks in .bz2 files, and write each block out into its own .bz2
|
||||
file. You can then use
|
||||
.I bzip2
|
||||
\-t
|
||||
to test the
|
||||
integrity of the resulting files, and decompress those which are
|
||||
undamaged.
|
||||
|
||||
.I bzip2recover
|
||||
takes a single argument, the name of the damaged file,
|
||||
and writes a number of files "rec00001file.bz2",
|
||||
"rec00002file.bz2", etc, containing the extracted blocks.
|
||||
The output filenames are designed so that the use of
|
||||
wildcards in subsequent processing -- for example,
|
||||
"bzip2 -dc rec*file.bz2 > recovered_data" -- processes the files in
|
||||
the correct order.
|
||||
|
||||
.I bzip2recover
|
||||
should be of most use dealing with large .bz2
|
||||
files, as these will contain many blocks. It is clearly
|
||||
futile to use it on damaged single-block files, since a
|
||||
damaged block cannot be recovered. If you wish to minimise
|
||||
any potential data loss through media or transmission errors,
|
||||
you might consider compressing with a smaller
|
||||
block size.
|
||||
|
||||
.SH PERFORMANCE NOTES
|
||||
The sorting phase of compression gathers together similar strings in the
|
||||
file. Because of this, files containing very long runs of repeated
|
||||
symbols, like "aabaabaabaab ..." (repeated several hundred times) may
|
||||
compress more slowly than normal. Versions 0.9.5 and above fare much
|
||||
better than previous versions in this respect. The ratio between
|
||||
worst-case and average-case compression time is in the region of 10:1.
|
||||
For previous versions, this figure was more like 100:1. You can use the
|
||||
\-vvvv option to monitor progress in great detail, if you want.
|
||||
|
||||
Decompression speed is unaffected by these phenomena.
|
||||
|
||||
.I bzip2
|
||||
usually allocates several megabytes of memory to operate
|
||||
in, and then charges all over it in a fairly random fashion. This means
|
||||
that performance, both for compressing and decompressing, is largely
|
||||
determined by the speed at which your machine can service cache misses.
|
||||
Because of this, small changes to the code to reduce the miss rate have
|
||||
been observed to give disproportionately large performance improvements.
|
||||
I imagine
|
||||
.I bzip2
|
||||
will perform best on machines with very large caches.
|
||||
|
||||
.SH CAVEATS
|
||||
I/O error messages are not as helpful as they could be.
|
||||
.I bzip2
|
||||
tries hard to detect I/O errors and exit cleanly, but the details of
|
||||
what the problem is sometimes seem rather misleading.
|
||||
|
||||
This manual page pertains to version 1.0.2 of
|
||||
.I bzip2.
|
||||
Compressed data created by this version is entirely forwards and
|
||||
backwards compatible with the previous public releases, versions
|
||||
0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1, but with the following
|
||||
exception: 0.9.0 and above can correctly decompress multiple
|
||||
concatenated compressed files. 0.1pl2 cannot do this; it will stop
|
||||
after decompressing just the first file in the stream.
|
||||
|
||||
.I bzip2recover
|
||||
versions prior to this one, 1.0.2, used 32-bit integers to represent
|
||||
bit positions in compressed files, so it could not handle compressed
|
||||
files more than 512 megabytes long. Version 1.0.2 and above uses
|
||||
64-bit ints on some platforms which support them (GNU supported
|
||||
targets, and Windows). To establish whether or not bzip2recover was
|
||||
built with such a limitation, run it without arguments. In any event
|
||||
you can build yourself an unlimited version if you can recompile it
|
||||
with MaybeUInt64 set to be an unsigned 64-bit integer.
|
||||
|
||||
|
||||
|
||||
.SH AUTHOR
|
||||
Julian Seward, jseward@acm.org.
|
||||
|
||||
http://sources.redhat.com/bzip2
|
||||
|
||||
The ideas embodied in
|
||||
.I bzip2
|
||||
are due to (at least) the following
|
||||
people: Michael Burrows and David Wheeler (for the block sorting
|
||||
transformation), David Wheeler (again, for the Huffman coder), Peter
|
||||
Fenwick (for the structured coding model in the original
|
||||
.I bzip,
|
||||
and many refinements), and Alistair Moffat, Radford Neal and Ian Witten
|
||||
(for the arithmetic coder in the original
|
||||
.I bzip).
|
||||
I am much
|
||||
indebted for their help, support and advice. See the manual in the
|
||||
source distribution for pointers to sources of documentation. Christian
|
||||
von Roques encouraged me to look for faster sorting algorithms, so as to
|
||||
speed up compression. Bela Lubkin encouraged me to improve the
|
||||
worst-case compression performance.
|
||||
The bz* scripts are derived from those of GNU gzip.
|
||||
Many people sent patches, helped
|
||||
with portability problems, lent machines, gave advice and were generally
|
||||
helpful.
|
|
@ -1,398 +0,0 @@
|
|||
bzip2(1) bzip2(1)
|
||||
|
||||
|
||||
|
||||
NNAAMMEE
|
||||
bzip2, bunzip2 - a block-sorting file compressor, v1.0.2
|
||||
bzcat - decompresses files to stdout
|
||||
bzip2recover - recovers data from damaged bzip2 files
|
||||
|
||||
|
||||
SSYYNNOOPPSSIISS
|
||||
bbzziipp22 [ --ccddffkkqqssttvvzzVVLL112233445566778899 ] [ _f_i_l_e_n_a_m_e_s _._._. ]
|
||||
bbuunnzziipp22 [ --ffkkvvssVVLL ] [ _f_i_l_e_n_a_m_e_s _._._. ]
|
||||
bbzzccaatt [ --ss ] [ _f_i_l_e_n_a_m_e_s _._._. ]
|
||||
bbzziipp22rreeccoovveerr _f_i_l_e_n_a_m_e
|
||||
|
||||
|
||||
DDEESSCCRRIIPPTTIIOONN
|
||||
_b_z_i_p_2 compresses files using the Burrows-Wheeler block
|
||||
sorting text compression algorithm, and Huffman coding.
|
||||
Compression is generally considerably better than that
|
||||
achieved by more conventional LZ77/LZ78-based compressors,
|
||||
and approaches the performance of the PPM family of sta
|
||||
tistical compressors.
|
||||
|
||||
The command-line options are deliberately very similar to
|
||||
those of _G_N_U _g_z_i_p_, but they are not identical.
|
||||
|
||||
_b_z_i_p_2 expects a list of file names to accompany the com
|
||||
mand-line flags. Each file is replaced by a compressed
|
||||
version of itself, with the name "original_name.bz2".
|
||||
Each compressed file has the same modification date, per
|
||||
missions, and, when possible, ownership as the correspond
|
||||
ing original, so that these properties can be correctly
|
||||
restored at decompression time. File name handling is
|
||||
naive in the sense that there is no mechanism for preserv
|
||||
ing original file names, permissions, ownerships or dates
|
||||
in filesystems which lack these concepts, or have serious
|
||||
file name length restrictions, such as MS-DOS.
|
||||
|
||||
_b_z_i_p_2 and _b_u_n_z_i_p_2 will by default not overwrite existing
|
||||
files. If you want this to happen, specify the -f flag.
|
||||
|
||||
If no file names are specified, _b_z_i_p_2 compresses from
|
||||
standard input to standard output. In this case, _b_z_i_p_2
|
||||
will decline to write compressed output to a terminal, as
|
||||
this would be entirely incomprehensible and therefore
|
||||
pointless.
|
||||
|
||||
_b_u_n_z_i_p_2 (or _b_z_i_p_2 _-_d_) decompresses all specified files.
|
||||
Files which were not created by _b_z_i_p_2 will be detected and
|
||||
ignored, and a warning issued. _b_z_i_p_2 attempts to guess
|
||||
the filename for the decompressed file from that of the
|
||||
compressed file as follows:
|
||||
|
||||
filename.bz2 becomes filename
|
||||
filename.bz becomes filename
|
||||
filename.tbz2 becomes filename.tar
|
||||
filename.tbz becomes filename.tar
|
||||
anyothername becomes anyothername.out
|
||||
|
||||
If the file does not end in one of the recognised endings,
|
||||
_._b_z_2_, _._b_z_, _._t_b_z_2 or _._t_b_z_, _b_z_i_p_2 complains that it cannot
|
||||
guess the name of the original file, and uses the original
|
||||
name with _._o_u_t appended.
|
||||
|
||||
As with compression, supplying no filenames causes decom
|
||||
pression from standard input to standard output.
|
||||
|
||||
_b_u_n_z_i_p_2 will correctly decompress a file which is the con
|
||||
catenation of two or more compressed files. The result is
|
||||
the concatenation of the corresponding uncompressed files.
|
||||
Integrity testing (-t) of concatenated compressed files is
|
||||
also supported.
|
||||
|
||||
You can also compress or decompress files to the standard
|
||||
output by giving the -c flag. Multiple files may be com
|
||||
pressed and decompressed like this. The resulting outputs
|
||||
are fed sequentially to stdout. Compression of multiple
|
||||
files in this manner generates a stream containing multi
|
||||
ple compressed file representations. Such a stream can be
|
||||
decompressed correctly only by _b_z_i_p_2 version 0.9.0 or
|
||||
later. Earlier versions of _b_z_i_p_2 will stop after decom
|
||||
pressing the first file in the stream.
|
||||
|
||||
_b_z_c_a_t (or _b_z_i_p_2 _-_d_c_) decompresses all specified files to
|
||||
the standard output.
|
||||
|
||||
_b_z_i_p_2 will read arguments from the environment variables
|
||||
_B_Z_I_P_2 and _B_Z_I_P_, in that order, and will process them
|
||||
before any arguments read from the command line. This
|
||||
gives a convenient way to supply default arguments.
|
||||
|
||||
Compression is always performed, even if the compressed
|
||||
file is slightly larger than the original. Files of less
|
||||
than about one hundred bytes tend to get larger, since the
|
||||
compression mechanism has a constant overhead in the
|
||||
region of 50 bytes. Random data (including the output of
|
||||
most file compressors) is coded at about 8.05 bits per
|
||||
byte, giving an expansion of around 0.5%.
|
||||
|
||||
As a self-check for your protection, _b_z_i_p_2 uses 32-bit
|
||||
CRCs to make sure that the decompressed version of a file
|
||||
is identical to the original. This guards against corrup
|
||||
tion of the compressed data, and against undetected bugs
|
||||
in _b_z_i_p_2 (hopefully very unlikely). The chances of data
|
||||
corruption going undetected is microscopic, about one
|
||||
chance in four billion for each file processed. Be aware,
|
||||
though, that the check occurs upon decompression, so it
|
||||
can only tell you that something is wrong. It can't help
|
||||
you recover the original uncompressed data. You can use
|
||||
_b_z_i_p_2_r_e_c_o_v_e_r to try to recover data from damaged files.
|
||||
|
||||
Return values: 0 for a normal exit, 1 for environmental
|
||||
problems (file not found, invalid flags, I/O errors, &c),
|
||||
2 to indicate a corrupt compressed file, 3 for an internal
|
||||
consistency error (eg, bug) which caused _b_z_i_p_2 to panic.
|
||||
|
||||
|
||||
OOPPTTIIOONNSS
|
||||
--cc ----ssttddoouutt
|
||||
Compress or decompress to standard output.
|
||||
|
||||
--dd ----ddeeccoommpprreessss
|
||||
Force decompression. _b_z_i_p_2_, _b_u_n_z_i_p_2 and _b_z_c_a_t are
|
||||
really the same program, and the decision about
|
||||
what actions to take is done on the basis of which
|
||||
name is used. This flag overrides that mechanism,
|
||||
and forces _b_z_i_p_2 to decompress.
|
||||
|
||||
--zz ----ccoommpprreessss
|
||||
The complement to -d: forces compression,
|
||||
regardless of the invocation name.
|
||||
|
||||
--tt ----tteesstt
|
||||
Check integrity of the specified file(s), but don't
|
||||
decompress them. This really performs a trial
|
||||
decompression and throws away the result.
|
||||
|
||||
--ff ----ffoorrccee
|
||||
Force overwrite of output files. Normally, _b_z_i_p_2
|
||||
will not overwrite existing output files. Also
|
||||
forces _b_z_i_p_2 to break hard links to files, which it
|
||||
otherwise wouldn't do.
|
||||
|
||||
bzip2 normally declines to decompress files which
|
||||
don't have the correct magic header bytes. If
|
||||
forced (-f), however, it will pass such files
|
||||
through unmodified. This is how GNU gzip behaves.
|
||||
|
||||
--kk ----kkeeeepp
|
||||
Keep (don't delete) input files during compression
|
||||
or decompression.
|
||||
|
||||
--ss ----ssmmaallll
|
||||
Reduce memory usage, for compression, decompression
|
||||
and testing. Files are decompressed and tested
|
||||
using a modified algorithm which only requires 2.5
|
||||
bytes per block byte. This means any file can be
|
||||
decompressed in 2300k of memory, albeit at about
|
||||
half the normal speed.
|
||||
|
||||
During compression, -s selects a block size of
|
||||
200k, which limits memory use to around the same
|
||||
figure, at the expense of your compression ratio.
|
||||
In short, if your machine is low on memory (8
|
||||
megabytes or less), use -s for everything. See
|
||||
MEMORY MANAGEMENT below.
|
||||
|
||||
--qq ----qquuiieett
|
||||
Suppress non-essential warning messages. Messages
|
||||
pertaining to I/O errors and other critical events
|
||||
will not be suppressed.
|
||||
|
||||
--vv ----vveerrbboossee
|
||||
Verbose mode -- show the compression ratio for each
|
||||
file processed. Further -v's increase the ver
|
||||
bosity level, spewing out lots of information which
|
||||
is primarily of interest for diagnostic purposes.
|
||||
|
||||
--LL ----lliicceennssee --VV ----vveerrssiioonn
|
||||
Display the software version, license terms and
|
||||
conditions.
|
||||
|
||||
--11 ((oorr ----ffaasstt)) ttoo --99 ((oorr ----bbeesstt))
|
||||
Set the block size to 100 k, 200 k .. 900 k when
|
||||
compressing. Has no effect when decompressing.
|
||||
See MEMORY MANAGEMENT below. The --fast and --best
|
||||
aliases are primarily for GNU gzip compatibility.
|
||||
In particular, --fast doesn't make things signifi
|
||||
cantly faster. And --best merely selects the
|
||||
default behaviour.
|
||||
|
||||
---- Treats all subsequent arguments as file names, even
|
||||
if they start with a dash. This is so you can han
|
||||
dle files with names beginning with a dash, for
|
||||
example: bzip2 -- -myfilename.
|
||||
|
||||
----rreeppeettiittiivvee--ffaasstt ----rreeppeettiittiivvee--bbeesstt
|
||||
These flags are redundant in versions 0.9.5 and
|
||||
above. They provided some coarse control over the
|
||||
behaviour of the sorting algorithm in earlier ver
|
||||
sions, which was sometimes useful. 0.9.5 and above
|
||||
have an improved algorithm which renders these
|
||||
flags irrelevant.
|
||||
|
||||
|
||||
MMEEMMOORRYY MMAANNAAGGEEMMEENNTT
|
||||
_b_z_i_p_2 compresses large files in blocks. The block size
|
||||
affects both the compression ratio achieved, and the
|
||||
amount of memory needed for compression and decompression.
|
||||
The flags -1 through -9 specify the block size to be
|
||||
100,000 bytes through 900,000 bytes (the default) respec
|
||||
tively. At decompression time, the block size used for
|
||||
compression is read from the header of the compressed
|
||||
file, and _b_u_n_z_i_p_2 then allocates itself just enough memory
|
||||
to decompress the file. Since block sizes are stored in
|
||||
compressed files, it follows that the flags -1 to -9 are
|
||||
irrelevant to and so ignored during decompression.
|
||||
|
||||
Compression and decompression requirements, in bytes, can
|
||||
be estimated as:
|
||||
|
||||
Compression: 400k + ( 8 x block size )
|
||||
|
||||
Decompression: 100k + ( 4 x block size ), or
|
||||
100k + ( 2.5 x block size )
|
||||
|
||||
Larger block sizes give rapidly diminishing marginal
|
||||
returns. Most of the compression comes from the first two
|
||||
or three hundred k of block size, a fact worth bearing in
|
||||
mind when using _b_z_i_p_2 on small machines. It is also
|
||||
important to appreciate that the decompression memory
|
||||
requirement is set at compression time by the choice of
|
||||
block size.
|
||||
|
||||
For files compressed with the default 900k block size,
|
||||
_b_u_n_z_i_p_2 will require about 3700 kbytes to decompress. To
|
||||
support decompression of any file on a 4 megabyte machine,
|
||||
_b_u_n_z_i_p_2 has an option to decompress using approximately
|
||||
half this amount of memory, about 2300 kbytes. Decompres
|
||||
sion speed is also halved, so you should use this option
|
||||
only where necessary. The relevant flag is -s.
|
||||
|
||||
In general, try and use the largest block size memory con
|
||||
straints allow, since that maximises the compression
|
||||
achieved. Compression and decompression speed are virtu
|
||||
ally unaffected by block size.
|
||||
|
||||
Another significant point applies to files which fit in a
|
||||
single block -- that means most files you'd encounter
|
||||
using a large block size. The amount of real memory
|
||||
touched is proportional to the size of the file, since the
|
||||
file is smaller than a block. For example, compressing a
|
||||
file 20,000 bytes long with the flag -9 will cause the
|
||||
compressor to allocate around 7600k of memory, but only
|
||||
touch 400k + 20000 * 8 = 560 kbytes of it. Similarly, the
|
||||
decompressor will allocate 3700k but only touch 100k +
|
||||
20000 * 4 = 180 kbytes.
|
||||
|
||||
Here is a table which summarises the maximum memory usage
|
||||
for different block sizes. Also recorded is the total
|
||||
compressed size for 14 files of the Calgary Text Compres
|
||||
sion Corpus totalling 3,141,622 bytes. This column gives
|
||||
some feel for how compression varies with block size.
|
||||
These figures tend to understate the advantage of larger
|
||||
block sizes for larger files, since the Corpus is domi
|
||||
nated by smaller files.
|
||||
|
||||
Compress Decompress Decompress Corpus
|
||||
Flag usage usage -s usage Size
|
||||
|
||||
-1 1200k 500k 350k 914704
|
||||
-2 2000k 900k 600k 877703
|
||||
-3 2800k 1300k 850k 860338
|
||||
-4 3600k 1700k 1100k 846899
|
||||
-5 4400k 2100k 1350k 845160
|
||||
-6 5200k 2500k 1600k 838626
|
||||
-7 6100k 2900k 1850k 834096
|
||||
-8 6800k 3300k 2100k 828642
|
||||
-9 7600k 3700k 2350k 828642
|
||||
|
||||
|
||||
RREECCOOVVEERRIINNGG DDAATTAA FFRROOMM DDAAMMAAGGEEDD FFIILLEESS
|
||||
_b_z_i_p_2 compresses files in blocks, usually 900kbytes long.
|
||||
Each block is handled independently. If a media or trans
|
||||
mission error causes a multi-block .bz2 file to become
|
||||
damaged, it may be possible to recover data from the
|
||||
undamaged blocks in the file.
|
||||
|
||||
The compressed representation of each block is delimited
|
||||
by a 48-bit pattern, which makes it possible to find the
|
||||
block boundaries with reasonable certainty. Each block
|
||||
also carries its own 32-bit CRC, so damaged blocks can be
|
||||
distinguished from undamaged ones.
|
||||
|
||||
_b_z_i_p_2_r_e_c_o_v_e_r is a simple program whose purpose is to
|
||||
search for blocks in .bz2 files, and write each block out
|
||||
into its own .bz2 file. You can then use _b_z_i_p_2 -t to test
|
||||
the integrity of the resulting files, and decompress those
|
||||
which are undamaged.
|
||||
|
||||
_b_z_i_p_2_r_e_c_o_v_e_r takes a single argument, the name of the dam
|
||||
aged file, and writes a number of files
|
||||
"rec00001file.bz2", "rec00002file.bz2", etc, containing
|
||||
the extracted blocks. The output filenames are
|
||||
designed so that the use of wildcards in subsequent pro
|
||||
cessing -- for example, "bzip2 -dc rec*file.bz2 > recov
|
||||
ered_data" -- processes the files in the correct order.
|
||||
|
||||
_b_z_i_p_2_r_e_c_o_v_e_r should be of most use dealing with large .bz2
|
||||
files, as these will contain many blocks. It is clearly
|
||||
futile to use it on damaged single-block files, since a
|
||||
damaged block cannot be recovered. If you wish to min
|
||||
imise any potential data loss through media or transmis
|
||||
sion errors, you might consider compressing with a smaller
|
||||
block size.
|
||||
|
||||
|
||||
PPEERRFFOORRMMAANNCCEE NNOOTTEESS
|
||||
The sorting phase of compression gathers together similar
|
||||
strings in the file. Because of this, files containing
|
||||
very long runs of repeated symbols, like "aabaabaabaab
|
||||
..." (repeated several hundred times) may compress more
|
||||
slowly than normal. Versions 0.9.5 and above fare much
|
||||
better than previous versions in this respect. The ratio
|
||||
between worst-case and average-case compression time is in
|
||||
the region of 10:1. For previous versions, this figure
|
||||
was more like 100:1. You can use the -vvvv option to mon
|
||||
itor progress in great detail, if you want.
|
||||
|
||||
Decompression speed is unaffected by these phenomena.
|
||||
|
||||
_b_z_i_p_2 usually allocates several megabytes of memory to
|
||||
operate in, and then charges all over it in a fairly ran
|
||||
dom fashion. This means that performance, both for com
|
||||
pressing and decompressing, is largely determined by the
|
||||
speed at which your machine can service cache misses.
|
||||
Because of this, small changes to the code to reduce the
|
||||
miss rate have been observed to give disproportionately
|
||||
large performance improvements. I imagine _b_z_i_p_2 will per
|
||||
form best on machines with very large caches.
|
||||
|
||||
|
||||
CCAAVVEEAATTSS
|
||||
I/O error messages are not as helpful as they could be.
|
||||
_b_z_i_p_2 tries hard to detect I/O errors and exit cleanly,
|
||||
but the details of what the problem is sometimes seem
|
||||
rather misleading.
|
||||
|
||||
This manual page pertains to version 1.0.2 of _b_z_i_p_2_. Com
|
||||
pressed data created by this version is entirely forwards
|
||||
and backwards compatible with the previous public
|
||||
releases, versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1,
|
||||
but with the following exception: 0.9.0 and above can cor
|
||||
rectly decompress multiple concatenated compressed files.
|
||||
0.1pl2 cannot do this; it will stop after decompressing
|
||||
just the first file in the stream.
|
||||
|
||||
_b_z_i_p_2_r_e_c_o_v_e_r versions prior to this one, 1.0.2, used
|
||||
32-bit integers to represent bit positions in compressed
|
||||
files, so it could not handle compressed files more than
|
||||
512 megabytes long. Version 1.0.2 and above uses 64-bit
|
||||
ints on some platforms which support them (GNU supported
|
||||
targets, and Windows). To establish whether or not
|
||||
bzip2recover was built with such a limitation, run it
|
||||
without arguments. In any event you can build yourself an
|
||||
unlimited version if you can recompile it with MaybeUInt64
|
||||
set to be an unsigned 64-bit integer.
|
||||
|
||||
|
||||
|
||||
|
||||
AAUUTTHHOORR
|
||||
Julian Seward, jseward@acm.org.
|
||||
|
||||
http://sources.redhat.com/bzip2
|
||||
|
||||
The ideas embodied in _b_z_i_p_2 are due to (at least) the fol
|
||||
lowing people: Michael Burrows and David Wheeler (for the
|
||||
block sorting transformation), David Wheeler (again, for
|
||||
the Huffman coder), Peter Fenwick (for the structured cod
|
||||
ing model in the original _b_z_i_p_, and many refinements), and
|
||||
Alistair Moffat, Radford Neal and Ian Witten (for the
|
||||
arithmetic coder in the original _b_z_i_p_)_. I am much
|
||||
indebted for their help, support and advice. See the man
|
||||
ual in the source distribution for pointers to sources of
|
||||
documentation. Christian von Roques encouraged me to look
|
||||
for faster sorting algorithms, so as to speed up compres
|
||||
sion. Bela Lubkin encouraged me to improve the worst-case
|
||||
compression performance. The bz* scripts are derived from
|
||||
those of GNU gzip. Many people sent patches, helped with
|
||||
portability problems, lent machines, gave advice and were
|
||||
generally helpful.
|
||||
|
||||
|
||||
|
||||
bzip2(1)
|
213
bzip2.c
213
bzip2.c
|
@ -3,6 +3,11 @@
|
|||
/*--- A block-sorting, lossless compressor bzip2.c ---*/
|
||||
/*-----------------------------------------------------------*/
|
||||
|
||||
/*-- Modified for use under GNO by Stephen Heumann --*/
|
||||
#ifdef __ORCAC__
|
||||
segment "bzip2";
|
||||
#endif
|
||||
|
||||
/*--
|
||||
This file is a part of bzip2 and/or libbzip2, a program and
|
||||
library for lossless, block-sorting data compression.
|
||||
|
@ -148,6 +153,14 @@
|
|||
#include <ctype.h>
|
||||
#include "bzlib.h"
|
||||
|
||||
#ifdef __appleiigs__
|
||||
#include <gsos.h>
|
||||
char *stristr(const char *, const char *);
|
||||
#if defined(__GNO__) && defined(__STACK_CHECK__)
|
||||
#include <gno/gno.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define ERROR_IF_EOF(i) { if ((i) == EOF) ioError(); }
|
||||
#define ERROR_IF_NOT_ZERO(i) { if ((i) != 0) ioError(); }
|
||||
#define ERROR_IF_MINUS_ONE(i) { if ((i) == (-1)) ioError(); }
|
||||
|
@ -213,6 +226,11 @@
|
|||
ERROR_IF_MINUS_ONE ( retVal ); \
|
||||
} while ( 0 )
|
||||
# endif
|
||||
|
||||
# ifdef __GNO__
|
||||
# undef SET_BINARY_MODE
|
||||
# define SET_BINARY_MODE(fd) fsetbinary(fd);
|
||||
# endif
|
||||
#endif /* BZ_UNIX */
|
||||
|
||||
|
||||
|
@ -220,7 +238,10 @@
|
|||
#if BZ_LCCWIN32
|
||||
# include <io.h>
|
||||
# include <fcntl.h>
|
||||
# include <sys\stat.h>
|
||||
/* This was "#include <sys\stat.h>", but ORCA/C complains
|
||||
about an invalid character, so I changed it. This
|
||||
might possibly break compilation on Win 32 systems. */
|
||||
# include <sys/stat.h>
|
||||
|
||||
# define NORETURN /**/
|
||||
# define PATH_SEP '\\'
|
||||
|
@ -253,8 +274,15 @@
|
|||
typedef char Char;
|
||||
typedef unsigned char Bool;
|
||||
typedef unsigned char UChar;
|
||||
#ifdef __ORCAC__
|
||||
typedef long Int32;
|
||||
typedef unsigned long UInt32;
|
||||
# define Int32_FMT "%ld"
|
||||
#else
|
||||
typedef int Int32;
|
||||
typedef unsigned int UInt32;
|
||||
# define Int32_FMT "%d"
|
||||
#endif /* defined __ORCAC__ */
|
||||
typedef short Int16;
|
||||
typedef unsigned short UInt16;
|
||||
|
||||
|
@ -386,7 +414,11 @@ static
|
|||
void uInt64_toAscii ( char* outbuf, UInt64* n )
|
||||
{
|
||||
Int32 i, q;
|
||||
#ifdef __ORCAC__
|
||||
static UChar buf[32];
|
||||
#else
|
||||
UChar buf[32];
|
||||
#endif
|
||||
Int32 nBuf = 0;
|
||||
UInt64 n_copy = *n;
|
||||
do {
|
||||
|
@ -416,15 +448,24 @@ Bool myfeof ( FILE* f )
|
|||
|
||||
|
||||
/*---------------------------------------------*/
|
||||
#ifndef __ORCAC__
|
||||
static
|
||||
void compressStream ( FILE *stream, FILE *zStream )
|
||||
{
|
||||
BZFILE* bzf = NULL;
|
||||
#ifdef __ORCAC__
|
||||
static UChar ibuf[5000];
|
||||
#else
|
||||
UChar ibuf[5000];
|
||||
#endif
|
||||
Int32 nIbuf;
|
||||
UInt32 nbytes_in_lo32, nbytes_in_hi32;
|
||||
UInt32 nbytes_out_lo32, nbytes_out_hi32;
|
||||
#ifdef __ORCAC__
|
||||
Int16 bzerr, bzerr_dummy, ret;
|
||||
#else
|
||||
Int32 bzerr, bzerr_dummy, ret;
|
||||
#endif
|
||||
|
||||
SET_BINARY_MODE(stream);
|
||||
SET_BINARY_MODE(zStream);
|
||||
|
@ -513,6 +554,7 @@ void compressStream ( FILE *stream, FILE *zStream )
|
|||
panic ( "compress:end" );
|
||||
/*notreached*/
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
@ -521,10 +563,18 @@ static
|
|||
Bool uncompressStream ( FILE *zStream, FILE *stream )
|
||||
{
|
||||
BZFILE* bzf = NULL;
|
||||
#ifdef __ORCAC__
|
||||
Int16 bzerr, bzerr_dummy;
|
||||
Int32 ret, nread, streamNo, i;
|
||||
static UChar obuf[5000];
|
||||
static UChar unused[BZ_MAX_UNUSED];
|
||||
Int16 nUnused;
|
||||
#else
|
||||
Int32 bzerr, bzerr_dummy, ret, nread, streamNo, i;
|
||||
UChar obuf[5000];
|
||||
UChar unused[BZ_MAX_UNUSED];
|
||||
Int32 nUnused;
|
||||
#endif
|
||||
UChar* unusedTmp;
|
||||
|
||||
nUnused = 0;
|
||||
|
@ -635,10 +685,18 @@ static
|
|||
Bool testStream ( FILE *zStream )
|
||||
{
|
||||
BZFILE* bzf = NULL;
|
||||
#ifdef __ORCAC__
|
||||
Int16 bzerr, bzerr_dummy, ret;
|
||||
Int32 nread, streamNo, i;
|
||||
static UChar obuf[5000];
|
||||
static UChar unused[BZ_MAX_UNUSED];
|
||||
Int16 nUnused;
|
||||
#else
|
||||
Int32 bzerr, bzerr_dummy, ret, nread, streamNo, i;
|
||||
UChar obuf[5000];
|
||||
UChar unused[BZ_MAX_UNUSED];
|
||||
Int32 nUnused;
|
||||
#endif
|
||||
UChar* unusedTmp;
|
||||
|
||||
nUnused = 0;
|
||||
|
@ -802,7 +860,11 @@ void cleanUpAndFail ( Int32 ec )
|
|||
"%s: `%s' may be incomplete.\n",
|
||||
progName, outName );
|
||||
fprintf ( stderr,
|
||||
#ifndef __GNO__
|
||||
"%s: I suggest doing an integrity test (bzip2 -tv)"
|
||||
#else
|
||||
"%s: I suggest doing an integrity test (bunzip2 -tv)"
|
||||
#endif
|
||||
" of it.\n",
|
||||
progName );
|
||||
}
|
||||
|
@ -811,7 +873,7 @@ void cleanUpAndFail ( Int32 ec )
|
|||
if (noisy && numFileNames > 0 && numFilesProcessed < numFileNames) {
|
||||
fprintf ( stderr,
|
||||
"%s: WARNING: some files have not been processed:\n"
|
||||
"%s: %d specified on command line, %d not processed yet.\n\n",
|
||||
"%s: " Int32_FMT " specified on command line, " Int32_FMT " not processed yet.\n\n",
|
||||
progName, progName,
|
||||
numFileNames, numFileNames - numFilesProcessed );
|
||||
}
|
||||
|
@ -827,8 +889,16 @@ void panic ( Char* s )
|
|||
fprintf ( stderr,
|
||||
"\n%s: PANIC -- internal consistency error:\n"
|
||||
"\t%s\n"
|
||||
#ifndef __GNO__
|
||||
"\tThis is a BUG. Please report it to me at:\n"
|
||||
"\tjseward@acm.org\n",
|
||||
#else
|
||||
"\tThis is a BUG. If you are experiencing it only in\n"
|
||||
"the GNO version of bunzip2, please report it to me at\n"
|
||||
"sheumann@myrealbox.com . If you can duplicate it in\n"
|
||||
"other versions of bzip2 as well, please report it to\n"
|
||||
"the original author Julian Seward at tjseward@acm.org\n",
|
||||
#endif
|
||||
progName, s );
|
||||
showFileNames();
|
||||
cleanUpAndFail( 3 );
|
||||
|
@ -880,6 +950,10 @@ void ioError ( void )
|
|||
|
||||
|
||||
/*---------------------------------------------*/
|
||||
#ifdef __ORCAC__
|
||||
#pragma databank 1
|
||||
#endif
|
||||
|
||||
static
|
||||
void mySignalCatcher ( IntNative n )
|
||||
{
|
||||
|
@ -889,11 +963,14 @@ void mySignalCatcher ( IntNative n )
|
|||
cleanUpAndFail(1);
|
||||
}
|
||||
|
||||
|
||||
/* This function should never be called on a normal GNO system,
|
||||
but it doesn't hurt to leave it in. */
|
||||
/*---------------------------------------------*/
|
||||
static
|
||||
void mySIGSEGVorSIGBUScatcher ( IntNative n )
|
||||
{
|
||||
#ifndef __ORCAC__
|
||||
/* Not needed for decompression */
|
||||
if (opMode == OM_Z)
|
||||
fprintf (
|
||||
stderr,
|
||||
|
@ -915,7 +992,10 @@ void mySIGSEGVorSIGBUScatcher ( IntNative n )
|
|||
" have the manual or can't be bothered to read it, mail me anyway.\n"
|
||||
"\n",
|
||||
progName );
|
||||
else
|
||||
else
|
||||
#else
|
||||
if (opMode != OM_Z)
|
||||
#endif
|
||||
fprintf (
|
||||
stderr,
|
||||
"\n%s: Caught a SIGSEGV or SIGBUS whilst decompressing.\n"
|
||||
|
@ -945,6 +1025,10 @@ void mySIGSEGVorSIGBUScatcher ( IntNative n )
|
|||
{ cadvise(); cleanUpAndFail( 2 ); }
|
||||
}
|
||||
|
||||
#ifdef __ORCAC__
|
||||
#pragma databank 0
|
||||
#endif
|
||||
|
||||
|
||||
/*---------------------------------------------*/
|
||||
static
|
||||
|
@ -1000,10 +1084,10 @@ void copyFileName ( Char* to, Char* from )
|
|||
if ( strlen(from) > FILE_NAME_LEN-10 ) {
|
||||
fprintf (
|
||||
stderr,
|
||||
"bzip2: file name\n`%s'\n"
|
||||
"%s: file name\n`%s'\n"
|
||||
"is suspiciously (more than %d chars) long.\n"
|
||||
"Try using a reasonable file name instead. Sorry! :-)\n",
|
||||
from, FILE_NAME_LEN-10
|
||||
progName, from, FILE_NAME_LEN-10
|
||||
);
|
||||
setExit(1);
|
||||
exit(exitValue);
|
||||
|
@ -1137,13 +1221,43 @@ void applySavedMetaInfoToOutputFile ( Char *dstName )
|
|||
retVal = chmod ( dstName, fileMetaInfo.st_mode );
|
||||
ERROR_IF_NOT_ZERO ( retVal );
|
||||
|
||||
#ifndef __ORCAC__
|
||||
/* ORCA/C's localtime(), which is called by utime(), is broken.
|
||||
* We fix this by simply disabling time setting, as bzip2 does
|
||||
* on non-Unix platforms anyway. A better solution would be
|
||||
* to fix or replace utime() and/or localtime().
|
||||
*/
|
||||
retVal = utime ( dstName, &uTimBuf );
|
||||
ERROR_IF_NOT_ZERO ( retVal );
|
||||
#endif
|
||||
|
||||
#ifdef __appleiigs__
|
||||
/* Set filetype to BIN if running on the GS */
|
||||
{
|
||||
static GSString255 fileNameStringGS;
|
||||
static FileInfoRecGS infoRec = { 4, /* pCount */
|
||||
&fileNameStringGS, /* Ptr to file name */
|
||||
0x00C3, /* access restrictions (none) */
|
||||
0x06, /* filetype (BIN) */
|
||||
0x0000 /* auxtype ($0000) */
|
||||
};
|
||||
|
||||
if (strlen( dstName ) <= 255) {
|
||||
strncpy( fileNameStringGS.text, dstName, 255 );
|
||||
fileNameStringGS.length = strlen( dstName );
|
||||
SetFileInfo( &infoRec );
|
||||
/* Ignore any errors produced by this call, leaving the file's
|
||||
existing filetype intact. This parallels the approach taken
|
||||
when setting file attributes on Unix.
|
||||
*/
|
||||
}
|
||||
}
|
||||
#else
|
||||
retVal = chown ( dstName, fileMetaInfo.st_uid, fileMetaInfo.st_gid );
|
||||
/* chown() will in many cases return with EPERM, which can
|
||||
be safely ignored.
|
||||
*/
|
||||
#endif /* defined __GNO__ */
|
||||
# endif
|
||||
}
|
||||
|
||||
|
@ -1181,8 +1295,14 @@ Bool hasSuffix ( Char* s, Char* suffix )
|
|||
{
|
||||
Int32 ns = strlen(s);
|
||||
Int32 nx = strlen(suffix);
|
||||
if (ns < nx) return False;
|
||||
if (ns < nx) return False;
|
||||
#ifndef __appleiigs__
|
||||
if (strcmp(s + ns - nx, suffix) == 0) return True;
|
||||
#else
|
||||
/* Filenames are case-insensitive on the GS,
|
||||
so use a case-insensitive compare for them */
|
||||
if (strcasecmp(s + ns - nx, suffix) == 0) return True;
|
||||
#endif
|
||||
return False;
|
||||
}
|
||||
|
||||
|
@ -1198,6 +1318,7 @@ Bool mapSuffix ( Char* name,
|
|||
|
||||
|
||||
/*---------------------------------------------*/
|
||||
#ifndef __ORCAC__
|
||||
static
|
||||
void compress ( Char *name )
|
||||
{
|
||||
|
@ -1279,7 +1400,7 @@ void compress ( Char *name )
|
|||
}
|
||||
if ( srcMode == SM_F2F && !forceOverwrite &&
|
||||
(n=countHardLinks ( inName )) > 0) {
|
||||
fprintf ( stderr, "%s: Input file %s has %d other link%s.\n",
|
||||
fprintf ( stderr, "%s: Input file %s has " Int32_FMT " other link%s.\n",
|
||||
progName, inName, n, n > 1 ? "s" : "" );
|
||||
setExit(1);
|
||||
return;
|
||||
|
@ -1376,6 +1497,7 @@ void compress ( Char *name )
|
|||
|
||||
deleteOutputOnInterrupt = False;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*---------------------------------------------*/
|
||||
|
@ -1465,7 +1587,7 @@ void uncompress ( Char *name )
|
|||
}
|
||||
if ( srcMode == SM_F2F && !forceOverwrite &&
|
||||
(n=countHardLinks ( inName ) ) > 0) {
|
||||
fprintf ( stderr, "%s: Input file %s has %d other link%s.\n",
|
||||
fprintf ( stderr, "%s: Input file %s has " Int32_FMT " other link%s.\n",
|
||||
progName, inName, n, n > 1 ? "s" : "" );
|
||||
setExit(1);
|
||||
return;
|
||||
|
@ -1671,7 +1793,11 @@ void license ( void )
|
|||
{
|
||||
fprintf ( stderr,
|
||||
|
||||
#ifndef __ORCAC__
|
||||
"bzip2, a block-sorting file compressor. "
|
||||
#else
|
||||
"bunzip2, a block-sorting file decompressor. "
|
||||
#endif
|
||||
"Version %s.\n"
|
||||
" \n"
|
||||
" Copyright (C) 1996-2002 by Julian Seward.\n"
|
||||
|
@ -1684,6 +1810,14 @@ void license ( void )
|
|||
" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
|
||||
" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"
|
||||
" LICENSE file for more details.\n"
|
||||
#ifdef __ORCAC__
|
||||
" \n"
|
||||
" This version of bunzip2 for GNO is based on Julian Seward's bzip2\n"
|
||||
" program for other platforms, with modifications by Stephen Heumann.\n"
|
||||
" \n"
|
||||
" This program contains material from the ORCA/C Run-Time Libraries,\n"
|
||||
" copyright 1987-1996 by Byte Works, Inc. Used with permission.\n"
|
||||
#endif
|
||||
" \n",
|
||||
BZ2_bzlibVersion()
|
||||
);
|
||||
|
@ -1696,13 +1830,19 @@ void usage ( Char *fullProgName )
|
|||
{
|
||||
fprintf (
|
||||
stderr,
|
||||
#ifndef __ORCAC__
|
||||
"bzip2, a block-sorting file compressor. "
|
||||
#else
|
||||
"bunzip2, a block-sorting file decompressor. "
|
||||
#endif
|
||||
"Version %s.\n"
|
||||
"\n usage: %s [flags and input files in any order]\n"
|
||||
"\n"
|
||||
" -h --help print this message\n"
|
||||
" -d --decompress force decompression\n"
|
||||
#ifndef __ORCAC__
|
||||
" -z --compress force compression\n"
|
||||
#endif
|
||||
" -k --keep keep (don't delete) input files\n"
|
||||
" -f --force overwrite existing output files\n"
|
||||
" -t --test test compressed file integrity\n"
|
||||
|
@ -1712,17 +1852,33 @@ void usage ( Char *fullProgName )
|
|||
" -L --license display software version & license\n"
|
||||
" -V --version display software version & license\n"
|
||||
" -s --small use less memory (at most 2500k)\n"
|
||||
#ifndef __ORCAC__
|
||||
" -1 .. -9 set block size to 100k .. 900k\n"
|
||||
" --fast alias for -1\n"
|
||||
" --best alias for -9\n"
|
||||
#endif
|
||||
"\n"
|
||||
#ifndef __ORCAC__
|
||||
" If invoked as `bzip2', default action is to compress.\n"
|
||||
" as `bunzip2', default action is to decompress.\n"
|
||||
#else
|
||||
" If invoked as 'bunzip2', default action is to decompress.\n"
|
||||
#endif
|
||||
" as `bzcat', default action is to decompress to stdout.\n"
|
||||
"\n"
|
||||
#ifndef __ORCAC__
|
||||
" If no file names are given, bzip2 compresses or decompresses\n"
|
||||
" from standard input to standard output. You can combine\n"
|
||||
" short flags, so `-v -4' means the same as -v4 or -4v, &c.\n"
|
||||
#else
|
||||
" If no file names are given, bunzip2 decompresses from standard\n"
|
||||
" input to standard output. You can combine short flags, so\n"
|
||||
" `-v -4' means the same as -v4 or -4v, &c.\n"
|
||||
"\n"
|
||||
" This version of bunzip2 for GNO is based on the bzip2 program for\n"
|
||||
" other platforms; however, it has all compression functionality\n"
|
||||
" disabled and will only decompress or test compressed files.\n"
|
||||
#endif
|
||||
# if BZ_UNIX
|
||||
"\n"
|
||||
# endif
|
||||
|
@ -1794,6 +1950,10 @@ Cell *mkCell ( void )
|
|||
|
||||
|
||||
/*---------------------------------------------*/
|
||||
#ifdef __ORCAC__
|
||||
#pragma optimize 119
|
||||
#endif
|
||||
|
||||
static
|
||||
Cell *snocString ( Cell *root, Char *name )
|
||||
{
|
||||
|
@ -1810,6 +1970,10 @@ Cell *snocString ( Cell *root, Char *name )
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef __ORCAC__
|
||||
#pragma optimize -1
|
||||
#endif
|
||||
|
||||
|
||||
/*---------------------------------------------*/
|
||||
static
|
||||
|
@ -1850,6 +2014,11 @@ IntNative main ( IntNative argc, Char *argv[] )
|
|||
Cell *aa;
|
||||
Bool decode;
|
||||
|
||||
#if defined(__GNO__) && defined(__STACK_CHECK__)
|
||||
__REPORT_STACK();
|
||||
fprintf(stderr, "Stack checking on\n");
|
||||
#endif
|
||||
|
||||
/*-- Be really really really paranoid :-) --*/
|
||||
if (sizeof(Int32) != 4 || sizeof(UInt32) != 4 ||
|
||||
sizeof(Int16) != 2 || sizeof(UInt16) != 2 ||
|
||||
|
@ -1920,6 +2089,7 @@ IntNative main ( IntNative argc, Char *argv[] )
|
|||
|
||||
/*-- Determine what to do (compress/uncompress/test/cat). --*/
|
||||
/*-- Note that subsequent flag handling may change this. --*/
|
||||
#ifndef __ORCAC__
|
||||
opMode = OM_Z;
|
||||
|
||||
if ( (strstr ( progName, "unzip" ) != 0) ||
|
||||
|
@ -1934,6 +2104,23 @@ IntNative main ( IntNative argc, Char *argv[] )
|
|||
srcMode = (numFileNames == 0) ? SM_I2O : SM_F2O;
|
||||
}
|
||||
|
||||
#else
|
||||
/* GNO modifications: Decompress by default, and use case-insensitive
|
||||
compares for filenames, in keeping with the normal practice on the GS */
|
||||
opMode = OM_UNZ;
|
||||
|
||||
if (stristr ( progName, "bzip" ) != 0)
|
||||
opMode = OM_Z;
|
||||
|
||||
if (stristr ( progName, "unzip" ) != 0)
|
||||
opMode = OM_UNZ;
|
||||
|
||||
if ( (stristr ( progName, "z2cat" ) != 0) ||
|
||||
(stristr ( progName, "zcat" ) != 0) ) {
|
||||
opMode = OM_UNZ;
|
||||
srcMode = (numFileNames == 0) ? SM_I2O : SM_F2O;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*-- Look at the flags. --*/
|
||||
for (aa = argList; aa != NULL; aa = aa->link) {
|
||||
|
@ -2026,6 +2213,7 @@ IntNative main ( IntNative argc, Char *argv[] )
|
|||
}
|
||||
|
||||
if (opMode == OM_Z) {
|
||||
#ifndef __ORCAC__
|
||||
if (srcMode == SM_I2O) {
|
||||
compress ( NULL );
|
||||
} else {
|
||||
|
@ -2037,6 +2225,13 @@ IntNative main ( IntNative argc, Char *argv[] )
|
|||
compress ( aa->name );
|
||||
}
|
||||
}
|
||||
#else
|
||||
fprintf ( stderr,
|
||||
"%s: Cannot compress data. The GNO version of bunzip2 does\n"
|
||||
"%s: not support compression, only decompression and testing.\n",
|
||||
progName, progName );
|
||||
cleanUpAndFail( 4 );
|
||||
#endif
|
||||
}
|
||||
else
|
||||
|
||||
|
|
390
bzip2.txt
390
bzip2.txt
|
@ -1,390 +0,0 @@
|
|||
|
||||
NAME
|
||||
bzip2, bunzip2 - a block-sorting file compressor, v1.0.2
|
||||
bzcat - decompresses files to stdout
|
||||
bzip2recover - recovers data from damaged bzip2 files
|
||||
|
||||
|
||||
SYNOPSIS
|
||||
bzip2 [ -cdfkqstvzVL123456789 ] [ filenames ... ]
|
||||
bunzip2 [ -fkvsVL ] [ filenames ... ]
|
||||
bzcat [ -s ] [ filenames ... ]
|
||||
bzip2recover filename
|
||||
|
||||
|
||||
DESCRIPTION
|
||||
bzip2 compresses files using the Burrows-Wheeler block
|
||||
sorting text compression algorithm, and Huffman coding.
|
||||
Compression is generally considerably better than that
|
||||
achieved by more conventional LZ77/LZ78-based compressors,
|
||||
and approaches the performance of the PPM family of sta
|
||||
tistical compressors.
|
||||
|
||||
The command-line options are deliberately very similar to
|
||||
those of GNU gzip, but they are not identical.
|
||||
|
||||
bzip2 expects a list of file names to accompany the com
|
||||
mand-line flags. Each file is replaced by a compressed
|
||||
version of itself, with the name "original_name.bz2".
|
||||
Each compressed file has the same modification date, per
|
||||
missions, and, when possible, ownership as the correspond
|
||||
ing original, so that these properties can be correctly
|
||||
restored at decompression time. File name handling is
|
||||
naive in the sense that there is no mechanism for preserv
|
||||
ing original file names, permissions, ownerships or dates
|
||||
in filesystems which lack these concepts, or have serious
|
||||
file name length restrictions, such as MS-DOS.
|
||||
|
||||
bzip2 and bunzip2 will by default not overwrite existing
|
||||
files. If you want this to happen, specify the -f flag.
|
||||
|
||||
If no file names are specified, bzip2 compresses from
|
||||
standard input to standard output. In this case, bzip2
|
||||
will decline to write compressed output to a terminal, as
|
||||
this would be entirely incomprehensible and therefore
|
||||
pointless.
|
||||
|
||||
bunzip2 (or bzip2 -d) decompresses all specified files.
|
||||
Files which were not created by bzip2 will be detected and
|
||||
ignored, and a warning issued. bzip2 attempts to guess
|
||||
the filename for the decompressed file from that of the
|
||||
compressed file as follows:
|
||||
|
||||
filename.bz2 becomes filename
|
||||
filename.bz becomes filename
|
||||
filename.tbz2 becomes filename.tar
|
||||
filename.tbz becomes filename.tar
|
||||
anyothername becomes anyothername.out
|
||||
|
||||
If the file does not end in one of the recognised endings,
|
||||
.bz2, .bz, .tbz2 or .tbz, bzip2 complains that it cannot
|
||||
guess the name of the original file, and uses the original
|
||||
name with .out appended.
|
||||
|
||||
As with compression, supplying no filenames causes decom
|
||||
pression from standard input to standard output.
|
||||
|
||||
bunzip2 will correctly decompress a file which is the con
|
||||
catenation of two or more compressed files. The result is
|
||||
the concatenation of the corresponding uncompressed files.
|
||||
Integrity testing (-t) of concatenated compressed files is
|
||||
also supported.
|
||||
|
||||
You can also compress or decompress files to the standard
|
||||
output by giving the -c flag. Multiple files may be com
|
||||
pressed and decompressed like this. The resulting outputs
|
||||
are fed sequentially to stdout. Compression of multiple
|
||||
files in this manner generates a stream containing multi
|
||||
ple compressed file representations. Such a stream can be
|
||||
decompressed correctly only by bzip2 version 0.9.0 or
|
||||
later. Earlier versions of bzip2 will stop after decom
|
||||
pressing the first file in the stream.
|
||||
|
||||
bzcat (or bzip2 -dc) decompresses all specified files to
|
||||
the standard output.
|
||||
|
||||
bzip2 will read arguments from the environment variables
|
||||
BZIP2 and BZIP, in that order, and will process them
|
||||
before any arguments read from the command line. This
|
||||
gives a convenient way to supply default arguments.
|
||||
|
||||
Compression is always performed, even if the compressed
|
||||
file is slightly larger than the original. Files of less
|
||||
than about one hundred bytes tend to get larger, since the
|
||||
compression mechanism has a constant overhead in the
|
||||
region of 50 bytes. Random data (including the output of
|
||||
most file compressors) is coded at about 8.05 bits per
|
||||
byte, giving an expansion of around 0.5%.
|
||||
|
||||
As a self-check for your protection, bzip2 uses 32-bit
|
||||
CRCs to make sure that the decompressed version of a file
|
||||
is identical to the original. This guards against corrup
|
||||
tion of the compressed data, and against undetected bugs
|
||||
in bzip2 (hopefully very unlikely). The chances of data
|
||||
corruption going undetected is microscopic, about one
|
||||
chance in four billion for each file processed. Be aware,
|
||||
though, that the check occurs upon decompression, so it
|
||||
can only tell you that something is wrong. It can't help
|
||||
you recover the original uncompressed data. You can use
|
||||
bzip2recover to try to recover data from damaged files.
|
||||
|
||||
Return values: 0 for a normal exit, 1 for environmental
|
||||
problems (file not found, invalid flags, I/O errors, &c),
|
||||
2 to indicate a corrupt compressed file, 3 for an internal
|
||||
consistency error (eg, bug) which caused bzip2 to panic.
|
||||
|
||||
|
||||
OPTIONS
|
||||
-c --stdout
|
||||
Compress or decompress to standard output.
|
||||
|
||||
-d --decompress
|
||||
Force decompression. bzip2, bunzip2 and bzcat are
|
||||
really the same program, and the decision about
|
||||
what actions to take is done on the basis of which
|
||||
name is used. This flag overrides that mechanism,
|
||||
and forces bzip2 to decompress.
|
||||
|
||||
-z --compress
|
||||
The complement to -d: forces compression,
|
||||
regardless of the invocation name.
|
||||
|
||||
-t --test
|
||||
Check integrity of the specified file(s), but don't
|
||||
decompress them. This really performs a trial
|
||||
decompression and throws away the result.
|
||||
|
||||
-f --force
|
||||
Force overwrite of output files. Normally, bzip2
|
||||
will not overwrite existing output files. Also
|
||||
forces bzip2 to break hard links to files, which it
|
||||
otherwise wouldn't do.
|
||||
|
||||
bzip2 normally declines to decompress files which
|
||||
don't have the correct magic header bytes. If
|
||||
forced (-f), however, it will pass such files
|
||||
through unmodified. This is how GNU gzip behaves.
|
||||
|
||||
-k --keep
|
||||
Keep (don't delete) input files during compression
|
||||
or decompression.
|
||||
|
||||
-s --small
|
||||
Reduce memory usage, for compression, decompression
|
||||
and testing. Files are decompressed and tested
|
||||
using a modified algorithm which only requires 2.5
|
||||
bytes per block byte. This means any file can be
|
||||
decompressed in 2300k of memory, albeit at about
|
||||
half the normal speed.
|
||||
|
||||
During compression, -s selects a block size of
|
||||
200k, which limits memory use to around the same
|
||||
figure, at the expense of your compression ratio.
|
||||
In short, if your machine is low on memory (8
|
||||
megabytes or less), use -s for everything. See
|
||||
MEMORY MANAGEMENT below.
|
||||
|
||||
-q --quiet
|
||||
Suppress non-essential warning messages. Messages
|
||||
pertaining to I/O errors and other critical events
|
||||
will not be suppressed.
|
||||
|
||||
-v --verbose
|
||||
Verbose mode -- show the compression ratio for each
|
||||
file processed. Further -v's increase the ver
|
||||
bosity level, spewing out lots of information which
|
||||
is primarily of interest for diagnostic purposes.
|
||||
|
||||
-L --license -V --version
|
||||
Display the software version, license terms and
|
||||
conditions.
|
||||
|
||||
-1 (or --fast) to -9 (or --best)
|
||||
Set the block size to 100 k, 200 k .. 900 k when
|
||||
compressing. Has no effect when decompressing.
|
||||
See MEMORY MANAGEMENT below. The --fast and --best
|
||||
aliases are primarily for GNU gzip compatibility.
|
||||
In particular, --fast doesn't make things signifi
|
||||
cantly faster. And --best merely selects the
|
||||
default behaviour.
|
||||
|
||||
-- Treats all subsequent arguments as file names, even
|
||||
if they start with a dash. This is so you can han
|
||||
dle files with names beginning with a dash, for
|
||||
example: bzip2 -- -myfilename.
|
||||
|
||||
--repetitive-fast --repetitive-best
|
||||
These flags are redundant in versions 0.9.5 and
|
||||
above. They provided some coarse control over the
|
||||
behaviour of the sorting algorithm in earlier ver
|
||||
sions, which was sometimes useful. 0.9.5 and above
|
||||
have an improved algorithm which renders these
|
||||
flags irrelevant.
|
||||
|
||||
|
||||
MEMORY MANAGEMENT
|
||||
bzip2 compresses large files in blocks. The block size
|
||||
affects both the compression ratio achieved, and the
|
||||
amount of memory needed for compression and decompression.
|
||||
The flags -1 through -9 specify the block size to be
|
||||
100,000 bytes through 900,000 bytes (the default) respec
|
||||
tively. At decompression time, the block size used for
|
||||
compression is read from the header of the compressed
|
||||
file, and bunzip2 then allocates itself just enough memory
|
||||
to decompress the file. Since block sizes are stored in
|
||||
compressed files, it follows that the flags -1 to -9 are
|
||||
irrelevant to and so ignored during decompression.
|
||||
|
||||
Compression and decompression requirements, in bytes, can
|
||||
be estimated as:
|
||||
|
||||
Compression: 400k + ( 8 x block size )
|
||||
|
||||
Decompression: 100k + ( 4 x block size ), or
|
||||
100k + ( 2.5 x block size )
|
||||
|
||||
Larger block sizes give rapidly diminishing marginal
|
||||
returns. Most of the compression comes from the first two
|
||||
or three hundred k of block size, a fact worth bearing in
|
||||
mind when using bzip2 on small machines. It is also
|
||||
important to appreciate that the decompression memory
|
||||
requirement is set at compression time by the choice of
|
||||
block size.
|
||||
|
||||
For files compressed with the default 900k block size,
|
||||
bunzip2 will require about 3700 kbytes to decompress. To
|
||||
support decompression of any file on a 4 megabyte machine,
|
||||
bunzip2 has an option to decompress using approximately
|
||||
half this amount of memory, about 2300 kbytes. Decompres
|
||||
sion speed is also halved, so you should use this option
|
||||
only where necessary. The relevant flag is -s.
|
||||
|
||||
In general, try and use the largest block size memory con
|
||||
straints allow, since that maximises the compression
|
||||
achieved. Compression and decompression speed are virtu
|
||||
ally unaffected by block size.
|
||||
|
||||
Another significant point applies to files which fit in a
|
||||
single block -- that means most files you'd encounter
|
||||
using a large block size. The amount of real memory
|
||||
touched is proportional to the size of the file, since the
|
||||
file is smaller than a block. For example, compressing a
|
||||
file 20,000 bytes long with the flag -9 will cause the
|
||||
compressor to allocate around 7600k of memory, but only
|
||||
touch 400k + 20000 * 8 = 560 kbytes of it. Similarly, the
|
||||
decompressor will allocate 3700k but only touch 100k +
|
||||
20000 * 4 = 180 kbytes.
|
||||
|
||||
Here is a table which summarises the maximum memory usage
|
||||
for different block sizes. Also recorded is the total
|
||||
compressed size for 14 files of the Calgary Text Compres
|
||||
sion Corpus totalling 3,141,622 bytes. This column gives
|
||||
some feel for how compression varies with block size.
|
||||
These figures tend to understate the advantage of larger
|
||||
block sizes for larger files, since the Corpus is domi
|
||||
nated by smaller files.
|
||||
|
||||
Compress Decompress Decompress Corpus
|
||||
Flag usage usage -s usage Size
|
||||
|
||||
-1 1200k 500k 350k 914704
|
||||
-2 2000k 900k 600k 877703
|
||||
-3 2800k 1300k 850k 860338
|
||||
-4 3600k 1700k 1100k 846899
|
||||
-5 4400k 2100k 1350k 845160
|
||||
-6 5200k 2500k 1600k 838626
|
||||
-7 6100k 2900k 1850k 834096
|
||||
-8 6800k 3300k 2100k 828642
|
||||
-9 7600k 3700k 2350k 828642
|
||||
|
||||
|
||||
RECOVERING DATA FROM DAMAGED FILES
|
||||
bzip2 compresses files in blocks, usually 900kbytes long.
|
||||
Each block is handled independently. If a media or trans
|
||||
mission error causes a multi-block .bz2 file to become
|
||||
damaged, it may be possible to recover data from the
|
||||
undamaged blocks in the file.
|
||||
|
||||
The compressed representation of each block is delimited
|
||||
by a 48-bit pattern, which makes it possible to find the
|
||||
block boundaries with reasonable certainty. Each block
|
||||
also carries its own 32-bit CRC, so damaged blocks can be
|
||||
distinguished from undamaged ones.
|
||||
|
||||
bzip2recover is a simple program whose purpose is to
|
||||
search for blocks in .bz2 files, and write each block out
|
||||
into its own .bz2 file. You can then use bzip2 -t to test
|
||||
the integrity of the resulting files, and decompress those
|
||||
which are undamaged.
|
||||
|
||||
bzip2recover takes a single argument, the name of the dam
|
||||
aged file, and writes a number of files
|
||||
"rec00001file.bz2", "rec00002file.bz2", etc, containing
|
||||
the extracted blocks. The output filenames are
|
||||
designed so that the use of wildcards in subsequent pro
|
||||
cessing -- for example, "bzip2 -dc rec*file.bz2 > recov
|
||||
ered_data" -- processes the files in the correct order.
|
||||
|
||||
bzip2recover should be of most use dealing with large .bz2
|
||||
files, as these will contain many blocks. It is clearly
|
||||
futile to use it on damaged single-block files, since a
|
||||
damaged block cannot be recovered. If you wish to min
|
||||
imise any potential data loss through media or transmis
|
||||
sion errors, you might consider compressing with a smaller
|
||||
block size.
|
||||
|
||||
|
||||
PERFORMANCE NOTES
|
||||
The sorting phase of compression gathers together similar
|
||||
strings in the file. Because of this, files containing
|
||||
very long runs of repeated symbols, like "aabaabaabaab
|
||||
..." (repeated several hundred times) may compress more
|
||||
slowly than normal. Versions 0.9.5 and above fare much
|
||||
better than previous versions in this respect. The ratio
|
||||
between worst-case and average-case compression time is in
|
||||
the region of 10:1. For previous versions, this figure
|
||||
was more like 100:1. You can use the -vvvv option to mon
|
||||
itor progress in great detail, if you want.
|
||||
|
||||
Decompression speed is unaffected by these phenomena.
|
||||
|
||||
bzip2 usually allocates several megabytes of memory to
|
||||
operate in, and then charges all over it in a fairly ran
|
||||
dom fashion. This means that performance, both for com
|
||||
pressing and decompressing, is largely determined by the
|
||||
speed at which your machine can service cache misses.
|
||||
Because of this, small changes to the code to reduce the
|
||||
miss rate have been observed to give disproportionately
|
||||
large performance improvements. I imagine bzip2 will per
|
||||
form best on machines with very large caches.
|
||||
|
||||
|
||||
CAVEATS
|
||||
I/O error messages are not as helpful as they could be.
|
||||
bzip2 tries hard to detect I/O errors and exit cleanly,
|
||||
but the details of what the problem is sometimes seem
|
||||
rather misleading.
|
||||
|
||||
This manual page pertains to version 1.0.2 of bzip2. Com
|
||||
pressed data created by this version is entirely forwards
|
||||
and backwards compatible with the previous public
|
||||
releases, versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1,
|
||||
but with the following exception: 0.9.0 and above can cor
|
||||
rectly decompress multiple concatenated compressed files.
|
||||
0.1pl2 cannot do this; it will stop after decompressing
|
||||
just the first file in the stream.
|
||||
|
||||
bzip2recover versions prior to this one, 1.0.2, used
|
||||
32-bit integers to represent bit positions in compressed
|
||||
files, so it could not handle compressed files more than
|
||||
512 megabytes long. Version 1.0.2 and above uses 64-bit
|
||||
ints on some platforms which support them (GNU supported
|
||||
targets, and Windows). To establish whether or not
|
||||
bzip2recover was built with such a limitation, run it
|
||||
without arguments. In any event you can build yourself an
|
||||
unlimited version if you can recompile it with MaybeUInt64
|
||||
set to be an unsigned 64-bit integer.
|
||||
|
||||
|
||||
AUTHOR
|
||||
Julian Seward, jseward@acm.org.
|
||||
|
||||
http://sources.redhat.com/bzip2
|
||||
|
||||
The ideas embodied in bzip2 are due to (at least) the fol
|
||||
lowing people: Michael Burrows and David Wheeler (for the
|
||||
block sorting transformation), David Wheeler (again, for
|
||||
the Huffman coder), Peter Fenwick (for the structured cod
|
||||
ing model in the original bzip, and many refinements), and
|
||||
Alistair Moffat, Radford Neal and Ian Witten (for the
|
||||
arithmetic coder in the original bzip). I am much
|
||||
indebted for their help, support and advice. See the man
|
||||
ual in the source distribution for pointers to sources of
|
||||
documentation. Christian von Roques encouraged me to look
|
||||
for faster sorting algorithms, so as to speed up compres
|
||||
sion. Bela Lubkin encouraged me to improve the worst-case
|
||||
compression performance. The bz* scripts are derived from
|
||||
those of GNU gzip. Many people sent patches, helped with
|
||||
portability problems, lent machines, gave advice and were
|
||||
generally helpful.
|
||||
|
|
@ -0,0 +1 @@
|
|||
.so man1/bunzip2.1
|
131
bzip2recover.c
131
bzip2recover.c
|
@ -4,6 +4,8 @@
|
|||
/*--- bzip2recover.c ---*/
|
||||
/*-----------------------------------------------------------*/
|
||||
|
||||
/*-- Modified for use under GNO by Stephen Heumann --*/
|
||||
|
||||
/*--
|
||||
This program is bzip2recover, a program to attempt data
|
||||
salvage from damaged files created by the accompanying
|
||||
|
@ -56,7 +58,14 @@
|
|||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#ifdef __appleiigs__
|
||||
#include <gsos.h>
|
||||
#if defined(__GNO__) && defined(__STACK_CHECK__)
|
||||
#include <gno/gno.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* This program records bit locations in the file to be recovered.
|
||||
That means that if 64-bit ints are not supported, we will not
|
||||
|
@ -74,14 +83,28 @@
|
|||
#ifdef _MSC_VER
|
||||
typedef unsigned __int64 MaybeUInt64;
|
||||
# define MaybeUInt64_FMT "%I64u"
|
||||
#else
|
||||
#ifdef __ORCAC__
|
||||
typedef unsigned long MaybeUInt64;
|
||||
# define MaybeUInt64_FMT "%lu"
|
||||
#else
|
||||
typedef unsigned int MaybeUInt64;
|
||||
# define MaybeUInt64_FMT "%u"
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
typedef unsigned int UInt32;
|
||||
typedef int Int32;
|
||||
#ifdef __ORCAC__
|
||||
typedef unsigned long UInt32;
|
||||
typedef long Int32;
|
||||
# define Int32_FMT "%ld"
|
||||
# define size_t_FMT "%lu"
|
||||
#else
|
||||
typedef unsigned int UInt32;
|
||||
typedef int Int32;
|
||||
# define Int32_FMT "%d"
|
||||
# define size_t_FMT "%d"
|
||||
#endif /* defined __ORCAC__ */
|
||||
typedef unsigned char UChar;
|
||||
typedef char Char;
|
||||
typedef unsigned char Bool;
|
||||
|
@ -143,7 +166,7 @@ void writeError ( void )
|
|||
void mallocFail ( Int32 n )
|
||||
{
|
||||
fprintf ( stderr,
|
||||
"%s: malloc failed on request for %d bytes.\n",
|
||||
"%s: malloc failed on request for " Int32_FMT " bytes.\n",
|
||||
progName, n );
|
||||
fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
|
||||
progName );
|
||||
|
@ -155,7 +178,7 @@ void mallocFail ( Int32 n )
|
|||
void tooManyBlocks ( Int32 max_handled_blocks )
|
||||
{
|
||||
fprintf ( stderr,
|
||||
"%s: `%s' appears to contain more than %d blocks\n",
|
||||
"%s: `%s' appears to contain more than " Int32_FMT " blocks\n",
|
||||
progName, inFileName, max_handled_blocks );
|
||||
fprintf ( stderr,
|
||||
"%s: and cannot be handled. To fix, increase\n",
|
||||
|
@ -296,8 +319,13 @@ Bool endsInBz2 ( Char* name )
|
|||
if (n <= 4) return False;
|
||||
return
|
||||
(name[n-4] == '.' &&
|
||||
#ifdef __GNO__
|
||||
(name[n-3] == 'b' || name[n-3] == 'B') &&
|
||||
(name[n-2] == 'z' || name[n-3] == 'Z') &&
|
||||
#else
|
||||
name[n-3] == 'b' &&
|
||||
name[n-2] == 'z' &&
|
||||
#endif
|
||||
name[n-1] == '2');
|
||||
}
|
||||
|
||||
|
@ -313,6 +341,10 @@ Bool endsInBz2 ( Char* name )
|
|||
# define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */
|
||||
#endif
|
||||
|
||||
#ifdef __appleiigs__
|
||||
# define BZ_SPLIT_SYM_GS ':' /* possible path splitter on GS/OS */
|
||||
#endif
|
||||
|
||||
#define BLOCK_HEADER_HI 0x00003141UL
|
||||
#define BLOCK_HEADER_LO 0x59265359UL
|
||||
|
||||
|
@ -323,14 +355,28 @@ Bool endsInBz2 ( Char* name )
|
|||
would have an uncompressed size of at least 40GB, so the chances
|
||||
are low you'll need to up this.
|
||||
*/
|
||||
/* STH - Values larger than 5369 (actually a bit less than that)
|
||||
are useless when MaybeUInt64 is 32 bits.
|
||||
*/
|
||||
#ifdef __ORCAC__
|
||||
#define BZ_MAX_HANDLED_BLOCKS 5369
|
||||
#else
|
||||
#define BZ_MAX_HANDLED_BLOCKS 50000
|
||||
#endif
|
||||
|
||||
#ifndef __ORCAC__
|
||||
MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS];
|
||||
MaybeUInt64 bEnd [BZ_MAX_HANDLED_BLOCKS];
|
||||
MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS];
|
||||
MaybeUInt64 rbEnd [BZ_MAX_HANDLED_BLOCKS];
|
||||
#else /* if defined __GNO__ */
|
||||
MaybeUInt64 *bStart;
|
||||
MaybeUInt64 *bEnd;
|
||||
MaybeUInt64 *rbStart;
|
||||
MaybeUInt64 *rbEnd;
|
||||
#endif
|
||||
|
||||
Int32 main ( Int32 argc, Char** argv )
|
||||
int main ( int argc, Char** argv )
|
||||
{
|
||||
FILE* inFile;
|
||||
FILE* outFile;
|
||||
|
@ -341,11 +387,19 @@ Int32 main ( Int32 argc, Char** argv )
|
|||
UInt32 buffHi, buffLo, blockCRC;
|
||||
Char* p;
|
||||
|
||||
#if defined(__GNO__) && defined(__STACK_CHECK__)
|
||||
__REPORT_STACK();
|
||||
#endif
|
||||
|
||||
strcpy ( progName, argv[0] );
|
||||
inFileName[0] = outFileName[0] = 0;
|
||||
|
||||
fprintf ( stderr,
|
||||
fprintf ( stderr,
|
||||
#ifdef __GNO__
|
||||
"bzip2recover 1.0.2gs1: extracts blocks from damaged .bz2 files.\n" );
|
||||
#else
|
||||
"bzip2recover 1.0.2: extracts blocks from damaged .bz2 files.\n" );
|
||||
#endif
|
||||
|
||||
if (argc != 2) {
|
||||
fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
|
||||
|
@ -358,9 +412,11 @@ Int32 main ( Int32 argc, Char** argv )
|
|||
case 4:
|
||||
fprintf(stderr,
|
||||
"\trestrictions on size of recovered file: 512 MB\n");
|
||||
#ifndef __ORCAC__
|
||||
fprintf(stderr,
|
||||
"\tto circumvent, recompile with MaybeUInt64 as an\n"
|
||||
"\tunsigned 64-bit int.\n");
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr,
|
||||
|
@ -373,7 +429,7 @@ Int32 main ( Int32 argc, Char** argv )
|
|||
|
||||
if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) {
|
||||
fprintf ( stderr,
|
||||
"%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n",
|
||||
"%s: supplied filename is suspiciously (>= " size_t_FMT " chars) long. Bye!\n",
|
||||
progName, strlen(argv[1]) );
|
||||
exit(1);
|
||||
}
|
||||
|
@ -386,6 +442,21 @@ Int32 main ( Int32 argc, Char** argv )
|
|||
exit(1);
|
||||
}
|
||||
|
||||
/* Allocate big arrays dynamically so we can use small memory model. These aren't
|
||||
explicitly free()'d anywhere, but exist for the duration of the program. */
|
||||
#ifdef __ORCAC__
|
||||
bStart = malloc(BZ_MAX_HANDLED_BLOCKS * sizeof(MaybeUInt64));
|
||||
bEnd = malloc(BZ_MAX_HANDLED_BLOCKS * sizeof(MaybeUInt64));
|
||||
rbStart = malloc(BZ_MAX_HANDLED_BLOCKS * sizeof(MaybeUInt64));
|
||||
rbEnd = malloc(BZ_MAX_HANDLED_BLOCKS * sizeof(MaybeUInt64));
|
||||
|
||||
if ((bStart == NULL) || (bEnd == NULL) ||
|
||||
(rbStart == NULL) || (rbEnd == NULL)) {
|
||||
fprintf ( stderr, "%s: couldn't allocate enough memory\n", progName );
|
||||
exit(1);
|
||||
}
|
||||
#endif
|
||||
|
||||
bsIn = bsOpenReadStream ( inFile );
|
||||
fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
|
||||
|
||||
|
@ -404,7 +475,7 @@ Int32 main ( Int32 argc, Char** argv )
|
|||
(bitsRead - bStart[currBlock]) >= 40) {
|
||||
bEnd[currBlock] = bitsRead-1;
|
||||
if (currBlock > 0)
|
||||
fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT
|
||||
fprintf ( stderr, " block " Int32_FMT " runs from " MaybeUInt64_FMT
|
||||
" to " MaybeUInt64_FMT " (incomplete)\n",
|
||||
currBlock, bStart[currBlock], bEnd[currBlock] );
|
||||
} else
|
||||
|
@ -426,7 +497,7 @@ Int32 main ( Int32 argc, Char** argv )
|
|||
}
|
||||
if (currBlock > 0 &&
|
||||
(bEnd[currBlock] - bStart[currBlock]) >= 130) {
|
||||
fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT
|
||||
fprintf ( stderr, " block " Int32_FMT " runs from " MaybeUInt64_FMT
|
||||
" to " MaybeUInt64_FMT "\n",
|
||||
rbCtr+1, bStart[currBlock], bEnd[currBlock] );
|
||||
rbStart[rbCtr] = bStart[currBlock];
|
||||
|
@ -496,26 +567,41 @@ Int32 main ( Int32 argc, Char** argv )
|
|||
if (bitsRead == rbStart[wrBlock]) {
|
||||
/* Create the output file name, correctly handling leading paths.
|
||||
(31.10.2001 by Sergey E. Kusikov) */
|
||||
/* Modified by STH to make it work better on GNO. It would still
|
||||
be confused by files with a '/' character in their names. */
|
||||
Char* split;
|
||||
Int32 ofs, k;
|
||||
for (k = 0; k < BZ_MAX_FILENAME; k++)
|
||||
outFileName[k] = 0;
|
||||
strcpy (outFileName, inFileName);
|
||||
#ifdef __appleiigs__
|
||||
split = ((strrchr (outFileName, BZ_SPLIT_SYM_GS) >
|
||||
strrchr (outFileName, BZ_SPLIT_SYM)) ?
|
||||
strrchr (outFileName, BZ_SPLIT_SYM_GS) :
|
||||
strrchr (outFileName, BZ_SPLIT_SYM));
|
||||
#else
|
||||
split = strrchr (outFileName, BZ_SPLIT_SYM);
|
||||
#endif /* defined __appleiigs__ */
|
||||
if (split == NULL) {
|
||||
split = outFileName;
|
||||
} else {
|
||||
++split;
|
||||
}
|
||||
/* Now split points to the start of the basename. */
|
||||
ofs = split - outFileName;
|
||||
ofs = split - outFileName;
|
||||
/* On GS, max. block number is < 6000, and ProDOS filenames are short,
|
||||
so use only four digits in output file name */
|
||||
#ifdef __ORCAC__
|
||||
sprintf (split, "rec%4ld", wrBlock+1);
|
||||
#else
|
||||
sprintf (split, "rec%5d", wrBlock+1);
|
||||
#endif
|
||||
for (p = split; *p != 0; p++) if (*p == ' ') *p = '0';
|
||||
strcat (outFileName, inFileName + ofs);
|
||||
|
||||
if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
|
||||
|
||||
fprintf ( stderr, " writing block %d to `%s' ...\n",
|
||||
fprintf ( stderr, " writing block " Int32_FMT " to `%s' ...\n",
|
||||
wrBlock+1, outFileName );
|
||||
|
||||
outFile = fopen ( outFileName, "wb" );
|
||||
|
@ -524,6 +610,27 @@ Int32 main ( Int32 argc, Char** argv )
|
|||
progName, outFileName );
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#ifdef __appleiigs__
|
||||
/* Set filetype to BIN if running on the GS */
|
||||
{
|
||||
static GSString255 fileNameStringGS;
|
||||
static FileInfoRecGS infoRec = { 4, /* pCount */
|
||||
&fileNameStringGS, /* Ptr to file name */
|
||||
0x00C3, /* access restrictions (none) */
|
||||
0x06, /* filetype (BIN) */
|
||||
0x0000 /* auxtype ($0000) */
|
||||
};
|
||||
|
||||
if (strlen( outFileName ) <= 255) {
|
||||
strncpy( fileNameStringGS.text, outFileName, 255 );
|
||||
fileNameStringGS.length = strlen( outFileName );
|
||||
SetFileInfo( &infoRec );
|
||||
/* Ignoring any errors produced by this call */
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
bsWr = bsOpenWriteStream ( outFile );
|
||||
bsPutUChar ( bsWr, BZ_HDR_B );
|
||||
bsPutUChar ( bsWr, BZ_HDR_Z );
|
||||
|
@ -535,7 +642,7 @@ Int32 main ( Int32 argc, Char** argv )
|
|||
}
|
||||
}
|
||||
|
||||
fprintf ( stderr, "%s: finished\n", progName );
|
||||
fprintf ( stderr, "%s: finished\n", progName );
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
#include "/lang/orca/libraries/rinclude/Types.Rez"
|
||||
|
||||
resource rVersion (0x1, purgeable3, nocrossbank) {
|
||||
|
||||
{ 1, 0, 2, /* version 1.0.2 */
|
||||
release, /* development|alpha|beta|final|release */
|
||||
0 /* non-final release number */
|
||||
},
|
||||
verUS, /* country code -- only some are avail */
|
||||
"bzip2recover", /* name */
|
||||
/* _Very_ brief descrition. Check "file info" */
|
||||
/* shown in the Finder to see if it's too long */
|
||||
/* Note that \n is used to separate lines here. */
|
||||
"Bzip2 archive recovery program"
|
||||
};
|
173
bzlib.c
173
bzlib.c
|
@ -4,6 +4,11 @@
|
|||
/*--- bzlib.c ---*/
|
||||
/*-------------------------------------------------------------*/
|
||||
|
||||
/*-- Modified for use under GNO by Stephen Heumann --*/
|
||||
#ifdef __ORCAC__
|
||||
segment "bzip2";
|
||||
#endif
|
||||
|
||||
/*--
|
||||
This file is a part of bzip2 and/or libbzip2, a program and
|
||||
library for lossless, block-sorting data compression.
|
||||
|
@ -85,7 +90,18 @@
|
|||
#ifndef BZ_NO_STDIO
|
||||
void BZ2_bz__AssertH__fail ( int errcode )
|
||||
{
|
||||
fprintf(stderr,
|
||||
fprintf(stderr,
|
||||
#ifdef __GNO__
|
||||
"\n\nbunzip2/libbzip2: internal error number %d.\n"
|
||||
"This is a bug in bunzip2/libbzip2, %s.\n"
|
||||
"If you are experiencing it only in the GNO version of bunzip2,\n"
|
||||
"please report it to me at sheumann@myrealbox.com . If you can\n"
|
||||
"duplicate it in other versions of bzip2 as well, please report\n"
|
||||
"it to the original author Julian Seward at tjseward@acm.org .\n",
|
||||
"Please make an effort to report this bug; timely and accurate\n"
|
||||
"bug reports eventually lead to higher quality software. Thanks.\n"
|
||||
"Stephen Heumann and Julian Seward.\n\n",
|
||||
#else
|
||||
"\n\nbzip2/libbzip2: internal error number %d.\n"
|
||||
"This is a bug in bzip2/libbzip2, %s.\n"
|
||||
"Please report it to me at: jseward@acm.org. If this happened\n"
|
||||
|
@ -94,10 +110,15 @@ void BZ2_bz__AssertH__fail ( int errcode )
|
|||
"of that program. Please make an effort to report this bug;\n"
|
||||
"timely and accurate bug reports eventually lead to higher\n"
|
||||
"quality software. Thanks. Julian Seward, 30 December 2001.\n\n",
|
||||
#endif
|
||||
errcode,
|
||||
BZ2_bzlibVersion()
|
||||
);
|
||||
|
||||
#ifndef __ORCAC__
|
||||
/* Don't need this for decompression, since error 1007 is only
|
||||
* produced in the blocksort routines used for compression.
|
||||
*/
|
||||
if (errcode == 1007) {
|
||||
fprintf(stderr,
|
||||
"\n*** A special note about internal error number 1007 ***\n"
|
||||
|
@ -125,6 +146,7 @@ void BZ2_bz__AssertH__fail ( int errcode )
|
|||
"\n"
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
exit(3);
|
||||
}
|
||||
|
@ -135,9 +157,11 @@ void BZ2_bz__AssertH__fail ( int errcode )
|
|||
static
|
||||
int bz_config_ok ( void )
|
||||
{
|
||||
#ifndef __ORCAC__
|
||||
if (sizeof(int) != 4) return 0;
|
||||
if (sizeof(short) != 2) return 0;
|
||||
if (sizeof(char) != 1) return 0;
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -158,6 +182,7 @@ void default_bzfree ( void* opaque, void* addr )
|
|||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
#ifndef __ORCAC__
|
||||
static
|
||||
void prepare_new_block ( EState* s )
|
||||
{
|
||||
|
@ -169,17 +194,21 @@ void prepare_new_block ( EState* s )
|
|||
for (i = 0; i < 256; i++) s->inUse[i] = False;
|
||||
s->blockNo++;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
#ifndef __ORCAC__
|
||||
static
|
||||
void init_RL ( EState* s )
|
||||
{
|
||||
s->state_in_ch = 256;
|
||||
s->state_in_len = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef __ORCAC__
|
||||
static
|
||||
Bool isempty_RL ( EState* s )
|
||||
{
|
||||
|
@ -187,9 +216,11 @@ Bool isempty_RL ( EState* s )
|
|||
return False; else
|
||||
return True;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
#ifndef __ORCAC__
|
||||
int BZ_API(BZ2_bzCompressInit)
|
||||
( bz_stream* strm,
|
||||
int blockSize100k,
|
||||
|
@ -254,9 +285,11 @@ int BZ_API(BZ2_bzCompressInit)
|
|||
prepare_new_block ( s );
|
||||
return BZ_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
#ifndef __ORCAC__
|
||||
static
|
||||
void add_pair_to_block ( EState* s )
|
||||
{
|
||||
|
@ -267,6 +300,29 @@ void add_pair_to_block ( EState* s )
|
|||
}
|
||||
s->inUse[s->state_in_ch] = True;
|
||||
switch (s->state_in_len) {
|
||||
#ifdef __ORCAC__
|
||||
case 1:
|
||||
*((s->block)+(s->nblock)) = (UChar)ch; s->nblock++;
|
||||
break;
|
||||
case 2:
|
||||
*((s->block)+(s->nblock)) = (UChar)ch; s->nblock++;
|
||||
*((s->block)+(s->nblock)) = (UChar)ch; s->nblock++;
|
||||
break;
|
||||
case 3:
|
||||
*((s->block)+(s->nblock)) = (UChar)ch; s->nblock++;
|
||||
*((s->block)+(s->nblock)) = (UChar)ch; s->nblock++;
|
||||
*((s->block)+(s->nblock)) = (UChar)ch; s->nblock++;
|
||||
break;
|
||||
default:
|
||||
*((s->inUse)+(s->state_in_len-4)) = True;
|
||||
*((s->block)+(s->nblock)) = (UChar)ch; s->nblock++;
|
||||
*((s->block)+(s->nblock)) = (UChar)ch; s->nblock++;
|
||||
*((s->block)+(s->nblock)) = (UChar)ch; s->nblock++;
|
||||
*((s->block)+(s->nblock)) = (UChar)ch; s->nblock++;
|
||||
*((s->block)+(s->nblock)) = ((UChar)(s->state_in_len-4));
|
||||
s->nblock++;
|
||||
break;
|
||||
#else
|
||||
case 1:
|
||||
s->block[s->nblock] = (UChar)ch; s->nblock++;
|
||||
break;
|
||||
|
@ -288,20 +344,51 @@ void add_pair_to_block ( EState* s )
|
|||
s->block[s->nblock] = ((UChar)(s->state_in_len-4));
|
||||
s->nblock++;
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
#ifndef __ORCAC__
|
||||
static
|
||||
void flush_RL ( EState* s )
|
||||
{
|
||||
if (s->state_in_ch < 256) add_pair_to_block ( s );
|
||||
init_RL ( s );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
#ifdef __ORCAC__
|
||||
#define ADD_CHAR_TO_BLOCK(zs,zchh0) \
|
||||
{ \
|
||||
UInt32 zchh = (UInt32)(zchh0); \
|
||||
/*-- fast track the common case --*/ \
|
||||
if (zchh != zs->state_in_ch && \
|
||||
zs->state_in_len == 1) { \
|
||||
UChar ch = (UChar)(zs->state_in_ch); \
|
||||
BZ_UPDATE_CRC( zs->blockCRC, ch ); \
|
||||
*((zs->inUse)+(zs->state_in_ch)) = True; \
|
||||
*((zs->block)+(zs->nblock)) = (UChar)ch; \
|
||||
zs->nblock++; \
|
||||
zs->state_in_ch = zchh; \
|
||||
} \
|
||||
else \
|
||||
/*-- general, uncommon cases --*/ \
|
||||
if (zchh != zs->state_in_ch || \
|
||||
zs->state_in_len == 255) { \
|
||||
if (zs->state_in_ch < 256) \
|
||||
add_pair_to_block ( zs ); \
|
||||
zs->state_in_ch = zchh; \
|
||||
zs->state_in_len = 1; \
|
||||
} else { \
|
||||
zs->state_in_len++; \
|
||||
} \
|
||||
}
|
||||
#else
|
||||
#define ADD_CHAR_TO_BLOCK(zs,zchh0) \
|
||||
{ \
|
||||
UInt32 zchh = (UInt32)(zchh0); \
|
||||
|
@ -327,9 +414,11 @@ void flush_RL ( EState* s )
|
|||
zs->state_in_len++; \
|
||||
} \
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
#ifndef __ORCAC__
|
||||
static
|
||||
Bool copy_input_until_stop ( EState* s )
|
||||
{
|
||||
|
@ -372,9 +461,11 @@ Bool copy_input_until_stop ( EState* s )
|
|||
}
|
||||
return progress_in;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
#ifndef __ORCAC__
|
||||
static
|
||||
Bool copy_output_until_stop ( EState* s )
|
||||
{
|
||||
|
@ -399,9 +490,11 @@ Bool copy_output_until_stop ( EState* s )
|
|||
|
||||
return progress_out;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
#ifndef __ORCAC__
|
||||
static
|
||||
Bool handle_compress ( bz_stream* strm )
|
||||
{
|
||||
|
@ -446,9 +539,11 @@ Bool handle_compress ( bz_stream* strm )
|
|||
|
||||
return progress_in || progress_out;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
#ifndef __ORCAC__
|
||||
int BZ_API(BZ2_bzCompress) ( bz_stream *strm, int action )
|
||||
{
|
||||
Bool progress;
|
||||
|
@ -507,9 +602,11 @@ int BZ_API(BZ2_bzCompress) ( bz_stream *strm, int action )
|
|||
}
|
||||
return BZ_OK; /*--not reached--*/
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
#ifndef __ORCAC__
|
||||
int BZ_API(BZ2_bzCompressEnd) ( bz_stream *strm )
|
||||
{
|
||||
EState* s;
|
||||
|
@ -527,6 +624,7 @@ int BZ_API(BZ2_bzCompressEnd) ( bz_stream *strm )
|
|||
|
||||
return BZ_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
|
@ -636,12 +734,20 @@ void unRLE_obuf_to_output_FAST ( DState* s )
|
|||
UInt32* c_tt = s->tt;
|
||||
UInt32 c_tPos = s->tPos;
|
||||
char* cs_next_out = s->strm->next_out;
|
||||
#ifdef __ORCAC__
|
||||
unsigned long cs_avail_out = s->strm->avail_out;
|
||||
#else
|
||||
unsigned int cs_avail_out = s->strm->avail_out;
|
||||
#endif
|
||||
/* end restore */
|
||||
|
||||
UInt32 avail_out_INIT = cs_avail_out;
|
||||
Int32 s_save_nblockPP = s->save_nblock+1;
|
||||
#ifdef __ORCAC__
|
||||
unsigned long total_out_lo32_old;
|
||||
#else
|
||||
unsigned int total_out_lo32_old;
|
||||
#endif
|
||||
|
||||
while (True) {
|
||||
|
||||
|
@ -845,7 +951,7 @@ int BZ_API(BZ2_bzDecompress) ( bz_stream *strm )
|
|||
if (s->nblock_used == s->save_nblock+1 && s->state_out_len == 0) {
|
||||
BZ_FINALISE_CRC ( s->calculatedBlockCRC );
|
||||
if (s->verbosity >= 3)
|
||||
VPrintf2 ( " {0x%x, 0x%x}", s->storedBlockCRC,
|
||||
VPrintf2 ( " {" UInt32_HEXFMT ", " UInt32_HEXFMT "}", s->storedBlockCRC,
|
||||
s->calculatedBlockCRC );
|
||||
if (s->verbosity >= 2) VPrintf0 ( "]" );
|
||||
if (s->calculatedBlockCRC != s->storedBlockCRC)
|
||||
|
@ -863,7 +969,7 @@ int BZ_API(BZ2_bzDecompress) ( bz_stream *strm )
|
|||
Int32 r = BZ2_decompress ( s );
|
||||
if (r == BZ_STREAM_END) {
|
||||
if (s->verbosity >= 3)
|
||||
VPrintf2 ( "\n combined CRCs: stored = 0x%x, computed = 0x%x",
|
||||
VPrintf2 ( "\n combined CRCs: stored = " UInt32_HEXFMT ", computed = " UInt32_HEXFMT,
|
||||
s->storedCombinedCRC, s->calculatedCombinedCRC );
|
||||
if (s->calculatedCombinedCRC != s->storedCombinedCRC)
|
||||
return BZ_DATA_ERROR;
|
||||
|
@ -934,6 +1040,7 @@ static Bool myfeof ( FILE* f )
|
|||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
#ifndef __ORCAC__
|
||||
BZFILE* BZ_API(BZ2_bzWriteOpen)
|
||||
( int* bzerror,
|
||||
FILE* f,
|
||||
|
@ -978,15 +1085,21 @@ BZFILE* BZ_API(BZ2_bzWriteOpen)
|
|||
bzf->initialisedOk = True;
|
||||
return bzf;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
#ifndef __ORCAC__
|
||||
void BZ_API(BZ2_bzWrite)
|
||||
( int* bzerror,
|
||||
BZFILE* b,
|
||||
void* buf,
|
||||
void* buf,
|
||||
#ifdef __ORCAC__
|
||||
long len )
|
||||
#else
|
||||
int len )
|
||||
#endif
|
||||
{
|
||||
Int32 n, n2, ret;
|
||||
bzFile* bzf = (bzFile*)b;
|
||||
|
@ -1024,29 +1137,45 @@ void BZ_API(BZ2_bzWrite)
|
|||
{ BZ_SETERR(BZ_OK); return; };
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
#ifndef __ORCAC__
|
||||
void BZ_API(BZ2_bzWriteClose)
|
||||
( int* bzerror,
|
||||
BZFILE* b,
|
||||
int abandon,
|
||||
#ifdef __ORCAC__
|
||||
unsigned long* nbytes_in,
|
||||
unsigned long* nbytes_out )
|
||||
#else
|
||||
unsigned int* nbytes_in,
|
||||
unsigned int* nbytes_out )
|
||||
#endif
|
||||
{
|
||||
BZ2_bzWriteClose64 ( bzerror, b, abandon,
|
||||
nbytes_in, NULL, nbytes_out, NULL );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef __ORCAC__
|
||||
void BZ_API(BZ2_bzWriteClose64)
|
||||
( int* bzerror,
|
||||
BZFILE* b,
|
||||
int abandon,
|
||||
#ifdef __ORCAC__
|
||||
unsigned long* nbytes_in_lo32,
|
||||
unsigned long* nbytes_in_hi32,
|
||||
unsigned long* nbytes_out_lo32,
|
||||
unsigned long* nbytes_out_hi32 )
|
||||
#else
|
||||
unsigned int* nbytes_in_lo32,
|
||||
unsigned int* nbytes_in_hi32,
|
||||
unsigned int* nbytes_out_lo32,
|
||||
unsigned int* nbytes_out_hi32 )
|
||||
#endif
|
||||
{
|
||||
Int32 n, n2, ret;
|
||||
bzFile* bzf = (bzFile*)b;
|
||||
|
@ -1102,6 +1231,7 @@ void BZ_API(BZ2_bzWriteClose64)
|
|||
BZ2_bzCompressEnd ( &(bzf->strm) );
|
||||
free ( bzf );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
|
@ -1179,11 +1309,19 @@ void BZ_API(BZ2_bzReadClose) ( int *bzerror, BZFILE *b )
|
|||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
#ifdef __ORCAC__
|
||||
long BZ_API(BZ2_bzRead)
|
||||
#else
|
||||
int BZ_API(BZ2_bzRead)
|
||||
#endif
|
||||
( int* bzerror,
|
||||
BZFILE* b,
|
||||
void* buf,
|
||||
void* buf,
|
||||
#ifdef __ORCAC__
|
||||
long len )
|
||||
#else
|
||||
int len )
|
||||
#endif
|
||||
{
|
||||
Int32 n, ret;
|
||||
bzFile* bzf = (bzFile*)b;
|
||||
|
@ -1265,11 +1403,20 @@ void BZ_API(BZ2_bzReadGetUnused)
|
|||
/*---------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
#ifndef __ORCAC__
|
||||
int BZ_API(BZ2_bzBuffToBuffCompress)
|
||||
( char* dest,
|
||||
( char* dest,
|
||||
#ifdef __ORCAC__
|
||||
unsigned long* destLen,
|
||||
#else
|
||||
unsigned int* destLen,
|
||||
#endif
|
||||
char* source,
|
||||
#ifdef __ORCAC__
|
||||
unsigned long sourceLen,
|
||||
#else
|
||||
unsigned int sourceLen,
|
||||
#endif
|
||||
int blockSize100k,
|
||||
int verbosity,
|
||||
int workFactor )
|
||||
|
@ -1314,14 +1461,23 @@ int BZ_API(BZ2_bzBuffToBuffCompress)
|
|||
BZ2_bzCompressEnd ( &strm );
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
int BZ_API(BZ2_bzBuffToBuffDecompress)
|
||||
( char* dest,
|
||||
#ifdef __ORCAC__
|
||||
unsigned long* destLen,
|
||||
#else
|
||||
unsigned int* destLen,
|
||||
#endif
|
||||
char* source,
|
||||
#ifdef __ORCAC__
|
||||
unsigned long sourceLen,
|
||||
#else
|
||||
unsigned int sourceLen,
|
||||
#endif
|
||||
int small,
|
||||
int verbosity )
|
||||
{
|
||||
|
@ -1390,7 +1546,9 @@ const char * BZ_API(BZ2_bzlibVersion)(void)
|
|||
return BZ_VERSION;
|
||||
}
|
||||
|
||||
|
||||
/* This stuff is disabled because it may be broken under GNO due to
|
||||
16-bit ints. It has not been modified to use longs where needed. */
|
||||
#ifndef __ORCAC__
|
||||
#ifndef BZ_NO_STDIO
|
||||
/*---------------------------------------------------*/
|
||||
|
||||
|
@ -1586,6 +1744,7 @@ const char * BZ_API(BZ2_bzerror) (BZFILE *b, int *errnum)
|
|||
return bzerrorstrings[err*-1];
|
||||
}
|
||||
#endif
|
||||
#endif /* not defined __ORCAC__ */
|
||||
|
||||
|
||||
/*-------------------------------------------------------------*/
|
||||
|
|
87
bzlib.h
87
bzlib.h
|
@ -85,6 +85,27 @@ extern "C" {
|
|||
#define BZ_OUTBUFF_FULL (-8)
|
||||
#define BZ_CONFIG_ERROR (-9)
|
||||
|
||||
#ifdef __ORCAC__
|
||||
typedef
|
||||
struct {
|
||||
char *next_in;
|
||||
unsigned long avail_in;
|
||||
unsigned long total_in_lo32;
|
||||
unsigned long total_in_hi32;
|
||||
|
||||
char *next_out;
|
||||
unsigned long avail_out;
|
||||
unsigned long total_out_lo32;
|
||||
unsigned long total_out_hi32;
|
||||
|
||||
void *state;
|
||||
|
||||
void *(*bzalloc)(void *,long,long);
|
||||
void (*bzfree)(void *,void *);
|
||||
void *opaque;
|
||||
}
|
||||
bz_stream;
|
||||
#else
|
||||
typedef
|
||||
struct {
|
||||
char *next_in;
|
||||
|
@ -104,6 +125,7 @@ typedef
|
|||
void *opaque;
|
||||
}
|
||||
bz_stream;
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef BZ_IMPORT
|
||||
|
@ -195,12 +217,21 @@ BZ_EXTERN void BZ_API(BZ2_bzReadGetUnused) (
|
|||
int* nUnused
|
||||
);
|
||||
|
||||
#ifdef __ORCAC__
|
||||
BZ_EXTERN long BZ_API(BZ2_bzRead) (
|
||||
int* bzerror,
|
||||
BZFILE* b,
|
||||
void* buf,
|
||||
long len
|
||||
);
|
||||
#else
|
||||
BZ_EXTERN int BZ_API(BZ2_bzRead) (
|
||||
int* bzerror,
|
||||
BZFILE* b,
|
||||
void* buf,
|
||||
int len
|
||||
);
|
||||
#endif
|
||||
|
||||
BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) (
|
||||
int* bzerror,
|
||||
|
@ -210,13 +241,31 @@ BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) (
|
|||
int workFactor
|
||||
);
|
||||
|
||||
#ifdef __ORCAC__
|
||||
BZ_EXTERN void BZ_API(BZ2_bzWrite) (
|
||||
int* bzerror,
|
||||
BZFILE* b,
|
||||
void* buf,
|
||||
long len
|
||||
);
|
||||
#else
|
||||
BZ_EXTERN void BZ_API(BZ2_bzWrite) (
|
||||
int* bzerror,
|
||||
BZFILE* b,
|
||||
void* buf,
|
||||
int len
|
||||
);
|
||||
#endif
|
||||
|
||||
#ifdef __ORCAC__
|
||||
BZ_EXTERN void BZ_API(BZ2_bzWriteClose) (
|
||||
int* bzerror,
|
||||
BZFILE* b,
|
||||
int abandon,
|
||||
unsigned long* nbytes_in,
|
||||
unsigned long* nbytes_out
|
||||
);
|
||||
#else
|
||||
BZ_EXTERN void BZ_API(BZ2_bzWriteClose) (
|
||||
int* bzerror,
|
||||
BZFILE* b,
|
||||
|
@ -224,7 +273,19 @@ BZ_EXTERN void BZ_API(BZ2_bzWriteClose) (
|
|||
unsigned int* nbytes_in,
|
||||
unsigned int* nbytes_out
|
||||
);
|
||||
#endif
|
||||
|
||||
#ifdef __ORCAC__
|
||||
BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) (
|
||||
int* bzerror,
|
||||
BZFILE* b,
|
||||
int abandon,
|
||||
unsigned long* nbytes_in_lo32,
|
||||
unsigned long* nbytes_in_hi32,
|
||||
unsigned long* nbytes_out_lo32,
|
||||
unsigned long* nbytes_out_hi32
|
||||
);
|
||||
#else
|
||||
BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) (
|
||||
int* bzerror,
|
||||
BZFILE* b,
|
||||
|
@ -235,10 +296,31 @@ BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) (
|
|||
unsigned int* nbytes_out_hi32
|
||||
);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
/*-- Utility functions --*/
|
||||
|
||||
#ifdef __ORCAC__
|
||||
BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) (
|
||||
char* dest,
|
||||
unsigned long* destLen,
|
||||
char* source,
|
||||
unsigned long sourceLen,
|
||||
int blockSize100k,
|
||||
int verbosity,
|
||||
int workFactor
|
||||
);
|
||||
|
||||
BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) (
|
||||
char* dest,
|
||||
unsigned long* destLen,
|
||||
char* source,
|
||||
unsigned long sourceLen,
|
||||
int small,
|
||||
int verbosity
|
||||
);
|
||||
#else
|
||||
BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) (
|
||||
char* dest,
|
||||
unsigned int* destLen,
|
||||
|
@ -257,6 +339,7 @@ BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) (
|
|||
int small,
|
||||
int verbosity
|
||||
);
|
||||
#endif
|
||||
|
||||
|
||||
/*--
|
||||
|
@ -273,6 +356,9 @@ BZ_EXTERN const char * BZ_API(BZ2_bzlibVersion) (
|
|||
void
|
||||
);
|
||||
|
||||
/* This stuff is disabled because it may be broken under GNO due to
|
||||
16-bit ints. It has not been modified to use longs where needed. */
|
||||
#ifndef __ORCAC__
|
||||
#ifndef BZ_NO_STDIO
|
||||
BZ_EXTERN BZFILE * BZ_API(BZ2_bzopen) (
|
||||
const char *path,
|
||||
|
@ -309,6 +395,7 @@ BZ_EXTERN const char * BZ_API(BZ2_bzerror) (
|
|||
int *errnum
|
||||
);
|
||||
#endif
|
||||
#endif /* not defined __ORCAC__ */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
/*--- bzlib_private.h ---*/
|
||||
/*-------------------------------------------------------------*/
|
||||
|
||||
/*-- Modified for use under GNO by Stephen Heumann --*/
|
||||
|
||||
/*--
|
||||
This file is a part of bzip2 and/or libbzip2, a program and
|
||||
library for lossless, block-sorting data compression.
|
||||
|
@ -76,13 +78,30 @@
|
|||
|
||||
/*-- General stuff. --*/
|
||||
|
||||
#ifdef __GNO__
|
||||
#define BZ_VERSION "1.0.2gs1, 07-Jun-2003"
|
||||
#else
|
||||
#define BZ_VERSION "1.0.2, 30-Dec-2001"
|
||||
#endif
|
||||
|
||||
typedef char Char;
|
||||
typedef unsigned char Bool;
|
||||
typedef unsigned char UChar;
|
||||
typedef int Int32;
|
||||
typedef unsigned int UInt32;
|
||||
#ifdef __ORCAC__
|
||||
typedef long Int32;
|
||||
typedef unsigned long UInt32;
|
||||
# define Int32_FMT "%ld"
|
||||
# define UInt32_HEX8FMT "0x%8lx"
|
||||
# define UInt32_HEXFMT "0x%lx"
|
||||
# define Int32_6FMT "%6ld"
|
||||
#else
|
||||
typedef int Int32;
|
||||
typedef unsigned int UInt32;
|
||||
# define Int32_FMT "%d"
|
||||
# define UInt32_HEX8FMT "0x%8x"
|
||||
# define UInt32_HEXFMT "0x%x"
|
||||
# define Int32_6FMT "%6d"
|
||||
#endif /* defined __ORCAC__ */
|
||||
typedef short Int16;
|
||||
typedef unsigned short UInt16;
|
||||
|
||||
|
@ -162,7 +181,11 @@ extern void bz_internal_error ( int errcode );
|
|||
|
||||
/*-- Stuff for randomising repetitive blocks. --*/
|
||||
|
||||
#ifdef __ORCAC__
|
||||
extern Int16 BZ2_rNums[512];
|
||||
#else
|
||||
extern Int32 BZ2_rNums[512];
|
||||
#endif
|
||||
|
||||
#define BZ_RAND_DECLS \
|
||||
Int32 rNToGo; \
|
||||
|
|
61
bzmore
61
bzmore
|
@ -1,61 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Bzmore wrapped for bzip2,
|
||||
# adapted from zmore by Philippe Troin <phil@fifi.org> for Debian GNU/Linux.
|
||||
|
||||
PATH="/usr/bin:$PATH"; export PATH
|
||||
|
||||
prog=`echo $0 | sed 's|.*/||'`
|
||||
case "$prog" in
|
||||
*less) more=less ;;
|
||||
*) more=more ;;
|
||||
esac
|
||||
|
||||
if test "`echo -n a`" = "-n a"; then
|
||||
# looks like a SysV system:
|
||||
n1=''; n2='\c'
|
||||
else
|
||||
n1='-n'; n2=''
|
||||
fi
|
||||
oldtty=`stty -g 2>/dev/null`
|
||||
if stty -cbreak 2>/dev/null; then
|
||||
cb='cbreak'; ncb='-cbreak'
|
||||
else
|
||||
# 'stty min 1' resets eof to ^a on both SunOS and SysV!
|
||||
cb='min 1 -icanon'; ncb='icanon eof ^d'
|
||||
fi
|
||||
if test $? -eq 0 -a -n "$oldtty"; then
|
||||
trap 'stty $oldtty 2>/dev/null; exit' 0 2 3 5 10 13 15
|
||||
else
|
||||
trap 'stty $ncb echo 2>/dev/null; exit' 0 2 3 5 10 13 15
|
||||
fi
|
||||
|
||||
if test $# = 0; then
|
||||
if test -t 0; then
|
||||
echo usage: $prog files...
|
||||
else
|
||||
bzip2 -cdfq | eval $more
|
||||
fi
|
||||
else
|
||||
FIRST=1
|
||||
for FILE
|
||||
do
|
||||
if test $FIRST -eq 0; then
|
||||
echo $n1 "--More--(Next file: $FILE)$n2"
|
||||
stty $cb -echo 2>/dev/null
|
||||
ANS=`dd bs=1 count=1 2>/dev/null`
|
||||
stty $ncb echo 2>/dev/null
|
||||
echo " "
|
||||
if test "$ANS" = 'e' -o "$ANS" = 'q'; then
|
||||
exit
|
||||
fi
|
||||
fi
|
||||
if test "$ANS" != 's'; then
|
||||
echo "------> $FILE <------"
|
||||
bzip2 -cdfq "$FILE" | eval $more
|
||||
fi
|
||||
if test -t; then
|
||||
FIRST=0
|
||||
fi
|
||||
done
|
||||
fi
|
152
bzmore.1
152
bzmore.1
|
@ -1,152 +0,0 @@
|
|||
.\"Shamelessly copied from zmore.1 by Philippe Troin <phil@fifi.org>
|
||||
.\"for Debian GNU/Linux
|
||||
.TH BZMORE 1
|
||||
.SH NAME
|
||||
bzmore, bzless \- file perusal filter for crt viewing of bzip2 compressed text
|
||||
.SH SYNOPSIS
|
||||
.B bzmore
|
||||
[ name ... ]
|
||||
.br
|
||||
.B bzless
|
||||
[ name ... ]
|
||||
.SH NOTE
|
||||
In the following description,
|
||||
.I bzless
|
||||
and
|
||||
.I less
|
||||
can be used interchangeably with
|
||||
.I bzmore
|
||||
and
|
||||
.I more.
|
||||
.SH DESCRIPTION
|
||||
.I Bzmore
|
||||
is a filter which allows examination of compressed or plain text files
|
||||
one screenful at a time on a soft-copy terminal.
|
||||
.I bzmore
|
||||
works on files compressed with
|
||||
.I bzip2
|
||||
and also on uncompressed files.
|
||||
If a file does not exist,
|
||||
.I bzmore
|
||||
looks for a file of the same name with the addition of a .bz2 suffix.
|
||||
.PP
|
||||
.I Bzmore
|
||||
normally pauses after each screenful, printing --More--
|
||||
at the bottom of the screen.
|
||||
If the user then types a carriage return, one more line is displayed.
|
||||
If the user hits a space,
|
||||
another screenful is displayed. Other possibilities are enumerated later.
|
||||
.PP
|
||||
.I Bzmore
|
||||
looks in the file
|
||||
.I /etc/termcap
|
||||
to determine terminal characteristics,
|
||||
and to determine the default window size.
|
||||
On a terminal capable of displaying 24 lines,
|
||||
the default window size is 22 lines.
|
||||
Other sequences which may be typed when
|
||||
.I bzmore
|
||||
pauses, and their effects, are as follows (\fIi\fP is an optional integer
|
||||
argument, defaulting to 1) :
|
||||
.PP
|
||||
.IP \fIi\|\fP<space>
|
||||
display
|
||||
.I i
|
||||
more lines, (or another screenful if no argument is given)
|
||||
.PP
|
||||
.IP ^D
|
||||
display 11 more lines (a ``scroll'').
|
||||
If
|
||||
.I i
|
||||
is given, then the scroll size is set to \fIi\|\fP.
|
||||
.PP
|
||||
.IP d
|
||||
same as ^D (control-D)
|
||||
.PP
|
||||
.IP \fIi\|\fPz
|
||||
same as typing a space except that \fIi\|\fP, if present, becomes the new
|
||||
window size. Note that the window size reverts back to the default at the
|
||||
end of the current file.
|
||||
.PP
|
||||
.IP \fIi\|\fPs
|
||||
skip \fIi\|\fP lines and print a screenful of lines
|
||||
.PP
|
||||
.IP \fIi\|\fPf
|
||||
skip \fIi\fP screenfuls and print a screenful of lines
|
||||
.PP
|
||||
.IP "q or Q"
|
||||
quit reading the current file; go on to the next (if any)
|
||||
.PP
|
||||
.IP "e or q"
|
||||
When the prompt --More--(Next file:
|
||||
.IR file )
|
||||
is printed, this command causes bzmore to exit.
|
||||
.PP
|
||||
.IP s
|
||||
When the prompt --More--(Next file:
|
||||
.IR file )
|
||||
is printed, this command causes bzmore to skip the next file and continue.
|
||||
.PP
|
||||
.IP =
|
||||
Display the current line number.
|
||||
.PP
|
||||
.IP \fIi\|\fP/expr
|
||||
search for the \fIi\|\fP-th occurrence of the regular expression \fIexpr.\fP
|
||||
If the pattern is not found,
|
||||
.I bzmore
|
||||
goes on to the next file (if any).
|
||||
Otherwise, a screenful is displayed, starting two lines before the place
|
||||
where the expression was found.
|
||||
The user's erase and kill characters may be used to edit the regular
|
||||
expression.
|
||||
Erasing back past the first column cancels the search command.
|
||||
.PP
|
||||
.IP \fIi\|\fPn
|
||||
search for the \fIi\|\fP-th occurrence of the last regular expression entered.
|
||||
.PP
|
||||
.IP !command
|
||||
invoke a shell with \fIcommand\|\fP.
|
||||
The character `!' in "command" are replaced with the
|
||||
previous shell command. The sequence "\\!" is replaced by "!".
|
||||
.PP
|
||||
.IP ":q or :Q"
|
||||
quit reading the current file; go on to the next (if any)
|
||||
(same as q or Q).
|
||||
.PP
|
||||
.IP .
|
||||
(dot) repeat the previous command.
|
||||
.PP
|
||||
The commands take effect immediately, i.e., it is not necessary to
|
||||
type a carriage return.
|
||||
Up to the time when the command character itself is given,
|
||||
the user may hit the line kill character to cancel the numerical
|
||||
argument being formed.
|
||||
In addition, the user may hit the erase character to redisplay the
|
||||
--More-- message.
|
||||
.PP
|
||||
At any time when output is being sent to the terminal, the user can
|
||||
hit the quit key (normally control\-\\).
|
||||
.I Bzmore
|
||||
will stop sending output, and will display the usual --More--
|
||||
prompt.
|
||||
The user may then enter one of the above commands in the normal manner.
|
||||
Unfortunately, some output is lost when this is done, due to the
|
||||
fact that any characters waiting in the terminal's output queue
|
||||
are flushed when the quit signal occurs.
|
||||
.PP
|
||||
The terminal is set to
|
||||
.I noecho
|
||||
mode by this program so that the output can be continuous.
|
||||
What you type will thus not show on your terminal, except for the / and !
|
||||
commands.
|
||||
.PP
|
||||
If the standard output is not a teletype, then
|
||||
.I bzmore
|
||||
acts just like
|
||||
.I bzcat,
|
||||
except that a header is printed before each file.
|
||||
.SH FILES
|
||||
.DT
|
||||
/etc/termcap Terminal data base
|
||||
.SH "SEE ALSO"
|
||||
more(1), less(1), bzip2(1), bzdiff(1), bzgrep(1)
|
714
compress.c
714
compress.c
|
@ -1,714 +0,0 @@
|
|||
|
||||
/*-------------------------------------------------------------*/
|
||||
/*--- Compression machinery (not incl block sorting) ---*/
|
||||
/*--- compress.c ---*/
|
||||
/*-------------------------------------------------------------*/
|
||||
|
||||
/*--
|
||||
This file is a part of bzip2 and/or libbzip2, a program and
|
||||
library for lossless, block-sorting data compression.
|
||||
|
||||
Copyright (C) 1996-2002 Julian R Seward. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. The origin of this software must not be misrepresented; you must
|
||||
not claim that you wrote the original software. If you use this
|
||||
software in a product, an acknowledgment in the product
|
||||
documentation would be appreciated but is not required.
|
||||
|
||||
3. Altered source versions must be plainly marked as such, and must
|
||||
not be misrepresented as being the original software.
|
||||
|
||||
4. The name of the author may not be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
|
||||
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||||
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Julian Seward, Cambridge, UK.
|
||||
jseward@acm.org
|
||||
bzip2/libbzip2 version 1.0 of 21 March 2000
|
||||
|
||||
This program is based on (at least) the work of:
|
||||
Mike Burrows
|
||||
David Wheeler
|
||||
Peter Fenwick
|
||||
Alistair Moffat
|
||||
Radford Neal
|
||||
Ian H. Witten
|
||||
Robert Sedgewick
|
||||
Jon L. Bentley
|
||||
|
||||
For more information on these sources, see the manual.
|
||||
--*/
|
||||
|
||||
/*--
|
||||
CHANGES
|
||||
~~~~~~~
|
||||
0.9.0 -- original version.
|
||||
|
||||
0.9.0a/b -- no changes in this file.
|
||||
|
||||
0.9.0c
|
||||
* changed setting of nGroups in sendMTFValues() so as to
|
||||
do a bit better on small files
|
||||
--*/
|
||||
|
||||
#include "bzlib_private.h"
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
/*--- Bit stream I/O ---*/
|
||||
/*---------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
void BZ2_bsInitWrite ( EState* s )
|
||||
{
|
||||
s->bsLive = 0;
|
||||
s->bsBuff = 0;
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
static
|
||||
void bsFinishWrite ( EState* s )
|
||||
{
|
||||
while (s->bsLive > 0) {
|
||||
s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24);
|
||||
s->numZ++;
|
||||
s->bsBuff <<= 8;
|
||||
s->bsLive -= 8;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
#define bsNEEDW(nz) \
|
||||
{ \
|
||||
while (s->bsLive >= 8) { \
|
||||
s->zbits[s->numZ] \
|
||||
= (UChar)(s->bsBuff >> 24); \
|
||||
s->numZ++; \
|
||||
s->bsBuff <<= 8; \
|
||||
s->bsLive -= 8; \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
static
|
||||
__inline__
|
||||
void bsW ( EState* s, Int32 n, UInt32 v )
|
||||
{
|
||||
bsNEEDW ( n );
|
||||
s->bsBuff |= (v << (32 - s->bsLive - n));
|
||||
s->bsLive += n;
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
static
|
||||
void bsPutUInt32 ( EState* s, UInt32 u )
|
||||
{
|
||||
bsW ( s, 8, (u >> 24) & 0xffL );
|
||||
bsW ( s, 8, (u >> 16) & 0xffL );
|
||||
bsW ( s, 8, (u >> 8) & 0xffL );
|
||||
bsW ( s, 8, u & 0xffL );
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
static
|
||||
void bsPutUChar ( EState* s, UChar c )
|
||||
{
|
||||
bsW( s, 8, (UInt32)c );
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
/*--- The back end proper ---*/
|
||||
/*---------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
static
|
||||
void makeMaps_e ( EState* s )
|
||||
{
|
||||
Int32 i;
|
||||
s->nInUse = 0;
|
||||
for (i = 0; i < 256; i++)
|
||||
if (s->inUse[i]) {
|
||||
s->unseqToSeq[i] = s->nInUse;
|
||||
s->nInUse++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
static
|
||||
void generateMTFValues ( EState* s )
|
||||
{
|
||||
UChar yy[256];
|
||||
Int32 i, j;
|
||||
Int32 zPend;
|
||||
Int32 wr;
|
||||
Int32 EOB;
|
||||
|
||||
/*
|
||||
After sorting (eg, here),
|
||||
s->arr1 [ 0 .. s->nblock-1 ] holds sorted order,
|
||||
and
|
||||
((UChar*)s->arr2) [ 0 .. s->nblock-1 ]
|
||||
holds the original block data.
|
||||
|
||||
The first thing to do is generate the MTF values,
|
||||
and put them in
|
||||
((UInt16*)s->arr1) [ 0 .. s->nblock-1 ].
|
||||
Because there are strictly fewer or equal MTF values
|
||||
than block values, ptr values in this area are overwritten
|
||||
with MTF values only when they are no longer needed.
|
||||
|
||||
The final compressed bitstream is generated into the
|
||||
area starting at
|
||||
(UChar*) (&((UChar*)s->arr2)[s->nblock])
|
||||
|
||||
These storage aliases are set up in bzCompressInit(),
|
||||
except for the last one, which is arranged in
|
||||
compressBlock().
|
||||
*/
|
||||
UInt32* ptr = s->ptr;
|
||||
UChar* block = s->block;
|
||||
UInt16* mtfv = s->mtfv;
|
||||
|
||||
makeMaps_e ( s );
|
||||
EOB = s->nInUse+1;
|
||||
|
||||
for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0;
|
||||
|
||||
wr = 0;
|
||||
zPend = 0;
|
||||
for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i;
|
||||
|
||||
for (i = 0; i < s->nblock; i++) {
|
||||
UChar ll_i;
|
||||
AssertD ( wr <= i, "generateMTFValues(1)" );
|
||||
j = ptr[i]-1; if (j < 0) j += s->nblock;
|
||||
ll_i = s->unseqToSeq[block[j]];
|
||||
AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" );
|
||||
|
||||
if (yy[0] == ll_i) {
|
||||
zPend++;
|
||||
} else {
|
||||
|
||||
if (zPend > 0) {
|
||||
zPend--;
|
||||
while (True) {
|
||||
if (zPend & 1) {
|
||||
mtfv[wr] = BZ_RUNB; wr++;
|
||||
s->mtfFreq[BZ_RUNB]++;
|
||||
} else {
|
||||
mtfv[wr] = BZ_RUNA; wr++;
|
||||
s->mtfFreq[BZ_RUNA]++;
|
||||
}
|
||||
if (zPend < 2) break;
|
||||
zPend = (zPend - 2) / 2;
|
||||
};
|
||||
zPend = 0;
|
||||
}
|
||||
{
|
||||
register UChar rtmp;
|
||||
register UChar* ryy_j;
|
||||
register UChar rll_i;
|
||||
rtmp = yy[1];
|
||||
yy[1] = yy[0];
|
||||
ryy_j = &(yy[1]);
|
||||
rll_i = ll_i;
|
||||
while ( rll_i != rtmp ) {
|
||||
register UChar rtmp2;
|
||||
ryy_j++;
|
||||
rtmp2 = rtmp;
|
||||
rtmp = *ryy_j;
|
||||
*ryy_j = rtmp2;
|
||||
};
|
||||
yy[0] = rtmp;
|
||||
j = ryy_j - &(yy[0]);
|
||||
mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (zPend > 0) {
|
||||
zPend--;
|
||||
while (True) {
|
||||
if (zPend & 1) {
|
||||
mtfv[wr] = BZ_RUNB; wr++;
|
||||
s->mtfFreq[BZ_RUNB]++;
|
||||
} else {
|
||||
mtfv[wr] = BZ_RUNA; wr++;
|
||||
s->mtfFreq[BZ_RUNA]++;
|
||||
}
|
||||
if (zPend < 2) break;
|
||||
zPend = (zPend - 2) / 2;
|
||||
};
|
||||
zPend = 0;
|
||||
}
|
||||
|
||||
mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++;
|
||||
|
||||
s->nMTF = wr;
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
#define BZ_LESSER_ICOST 0
|
||||
#define BZ_GREATER_ICOST 15
|
||||
|
||||
static
|
||||
void sendMTFValues ( EState* s )
|
||||
{
|
||||
Int32 v, t, i, j, gs, ge, totc, bt, bc, iter;
|
||||
Int32 nSelectors, alphaSize, minLen, maxLen, selCtr;
|
||||
Int32 nGroups, nBytes;
|
||||
|
||||
/*--
|
||||
UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
|
||||
is a global since the decoder also needs it.
|
||||
|
||||
Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
|
||||
Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
|
||||
are also globals only used in this proc.
|
||||
Made global to keep stack frame size small.
|
||||
--*/
|
||||
|
||||
|
||||
UInt16 cost[BZ_N_GROUPS];
|
||||
Int32 fave[BZ_N_GROUPS];
|
||||
|
||||
UInt16* mtfv = s->mtfv;
|
||||
|
||||
if (s->verbosity >= 3)
|
||||
VPrintf3( " %d in block, %d after MTF & 1-2 coding, "
|
||||
"%d+2 syms in use\n",
|
||||
s->nblock, s->nMTF, s->nInUse );
|
||||
|
||||
alphaSize = s->nInUse+2;
|
||||
for (t = 0; t < BZ_N_GROUPS; t++)
|
||||
for (v = 0; v < alphaSize; v++)
|
||||
s->len[t][v] = BZ_GREATER_ICOST;
|
||||
|
||||
/*--- Decide how many coding tables to use ---*/
|
||||
AssertH ( s->nMTF > 0, 3001 );
|
||||
if (s->nMTF < 200) nGroups = 2; else
|
||||
if (s->nMTF < 600) nGroups = 3; else
|
||||
if (s->nMTF < 1200) nGroups = 4; else
|
||||
if (s->nMTF < 2400) nGroups = 5; else
|
||||
nGroups = 6;
|
||||
|
||||
/*--- Generate an initial set of coding tables ---*/
|
||||
{
|
||||
Int32 nPart, remF, tFreq, aFreq;
|
||||
|
||||
nPart = nGroups;
|
||||
remF = s->nMTF;
|
||||
gs = 0;
|
||||
while (nPart > 0) {
|
||||
tFreq = remF / nPart;
|
||||
ge = gs-1;
|
||||
aFreq = 0;
|
||||
while (aFreq < tFreq && ge < alphaSize-1) {
|
||||
ge++;
|
||||
aFreq += s->mtfFreq[ge];
|
||||
}
|
||||
|
||||
if (ge > gs
|
||||
&& nPart != nGroups && nPart != 1
|
||||
&& ((nGroups-nPart) % 2 == 1)) {
|
||||
aFreq -= s->mtfFreq[ge];
|
||||
ge--;
|
||||
}
|
||||
|
||||
if (s->verbosity >= 3)
|
||||
VPrintf5( " initial group %d, [%d .. %d], "
|
||||
"has %d syms (%4.1f%%)\n",
|
||||
nPart, gs, ge, aFreq,
|
||||
(100.0 * (float)aFreq) / (float)(s->nMTF) );
|
||||
|
||||
for (v = 0; v < alphaSize; v++)
|
||||
if (v >= gs && v <= ge)
|
||||
s->len[nPart-1][v] = BZ_LESSER_ICOST; else
|
||||
s->len[nPart-1][v] = BZ_GREATER_ICOST;
|
||||
|
||||
nPart--;
|
||||
gs = ge+1;
|
||||
remF -= aFreq;
|
||||
}
|
||||
}
|
||||
|
||||
/*---
|
||||
Iterate up to BZ_N_ITERS times to improve the tables.
|
||||
---*/
|
||||
for (iter = 0; iter < BZ_N_ITERS; iter++) {
|
||||
|
||||
for (t = 0; t < nGroups; t++) fave[t] = 0;
|
||||
|
||||
for (t = 0; t < nGroups; t++)
|
||||
for (v = 0; v < alphaSize; v++)
|
||||
s->rfreq[t][v] = 0;
|
||||
|
||||
/*---
|
||||
Set up an auxiliary length table which is used to fast-track
|
||||
the common case (nGroups == 6).
|
||||
---*/
|
||||
if (nGroups == 6) {
|
||||
for (v = 0; v < alphaSize; v++) {
|
||||
s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v];
|
||||
s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v];
|
||||
s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v];
|
||||
}
|
||||
}
|
||||
|
||||
nSelectors = 0;
|
||||
totc = 0;
|
||||
gs = 0;
|
||||
while (True) {
|
||||
|
||||
/*--- Set group start & end marks. --*/
|
||||
if (gs >= s->nMTF) break;
|
||||
ge = gs + BZ_G_SIZE - 1;
|
||||
if (ge >= s->nMTF) ge = s->nMTF-1;
|
||||
|
||||
/*--
|
||||
Calculate the cost of this group as coded
|
||||
by each of the coding tables.
|
||||
--*/
|
||||
for (t = 0; t < nGroups; t++) cost[t] = 0;
|
||||
|
||||
if (nGroups == 6 && 50 == ge-gs+1) {
|
||||
/*--- fast track the common case ---*/
|
||||
register UInt32 cost01, cost23, cost45;
|
||||
register UInt16 icv;
|
||||
cost01 = cost23 = cost45 = 0;
|
||||
|
||||
# define BZ_ITER(nn) \
|
||||
icv = mtfv[gs+(nn)]; \
|
||||
cost01 += s->len_pack[icv][0]; \
|
||||
cost23 += s->len_pack[icv][1]; \
|
||||
cost45 += s->len_pack[icv][2]; \
|
||||
|
||||
BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4);
|
||||
BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9);
|
||||
BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
|
||||
BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
|
||||
BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
|
||||
BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
|
||||
BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
|
||||
BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
|
||||
BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
|
||||
BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
|
||||
|
||||
# undef BZ_ITER
|
||||
|
||||
cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16;
|
||||
cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16;
|
||||
cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16;
|
||||
|
||||
} else {
|
||||
/*--- slow version which correctly handles all situations ---*/
|
||||
for (i = gs; i <= ge; i++) {
|
||||
UInt16 icv = mtfv[i];
|
||||
for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv];
|
||||
}
|
||||
}
|
||||
|
||||
/*--
|
||||
Find the coding table which is best for this group,
|
||||
and record its identity in the selector table.
|
||||
--*/
|
||||
bc = 999999999; bt = -1;
|
||||
for (t = 0; t < nGroups; t++)
|
||||
if (cost[t] < bc) { bc = cost[t]; bt = t; };
|
||||
totc += bc;
|
||||
fave[bt]++;
|
||||
s->selector[nSelectors] = bt;
|
||||
nSelectors++;
|
||||
|
||||
/*--
|
||||
Increment the symbol frequencies for the selected table.
|
||||
--*/
|
||||
if (nGroups == 6 && 50 == ge-gs+1) {
|
||||
/*--- fast track the common case ---*/
|
||||
|
||||
# define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++
|
||||
|
||||
BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4);
|
||||
BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9);
|
||||
BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
|
||||
BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
|
||||
BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
|
||||
BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
|
||||
BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
|
||||
BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
|
||||
BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
|
||||
BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
|
||||
|
||||
# undef BZ_ITUR
|
||||
|
||||
} else {
|
||||
/*--- slow version which correctly handles all situations ---*/
|
||||
for (i = gs; i <= ge; i++)
|
||||
s->rfreq[bt][ mtfv[i] ]++;
|
||||
}
|
||||
|
||||
gs = ge+1;
|
||||
}
|
||||
if (s->verbosity >= 3) {
|
||||
VPrintf2 ( " pass %d: size is %d, grp uses are ",
|
||||
iter+1, totc/8 );
|
||||
for (t = 0; t < nGroups; t++)
|
||||
VPrintf1 ( "%d ", fave[t] );
|
||||
VPrintf0 ( "\n" );
|
||||
}
|
||||
|
||||
/*--
|
||||
Recompute the tables based on the accumulated frequencies.
|
||||
--*/
|
||||
for (t = 0; t < nGroups; t++)
|
||||
BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]),
|
||||
alphaSize, 20 );
|
||||
}
|
||||
|
||||
|
||||
AssertH( nGroups < 8, 3002 );
|
||||
AssertH( nSelectors < 32768 &&
|
||||
nSelectors <= (2 + (900000 / BZ_G_SIZE)),
|
||||
3003 );
|
||||
|
||||
|
||||
/*--- Compute MTF values for the selectors. ---*/
|
||||
{
|
||||
UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
|
||||
for (i = 0; i < nGroups; i++) pos[i] = i;
|
||||
for (i = 0; i < nSelectors; i++) {
|
||||
ll_i = s->selector[i];
|
||||
j = 0;
|
||||
tmp = pos[j];
|
||||
while ( ll_i != tmp ) {
|
||||
j++;
|
||||
tmp2 = tmp;
|
||||
tmp = pos[j];
|
||||
pos[j] = tmp2;
|
||||
};
|
||||
pos[0] = tmp;
|
||||
s->selectorMtf[i] = j;
|
||||
}
|
||||
};
|
||||
|
||||
/*--- Assign actual codes for the tables. --*/
|
||||
for (t = 0; t < nGroups; t++) {
|
||||
minLen = 32;
|
||||
maxLen = 0;
|
||||
for (i = 0; i < alphaSize; i++) {
|
||||
if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
|
||||
if (s->len[t][i] < minLen) minLen = s->len[t][i];
|
||||
}
|
||||
AssertH ( !(maxLen > 20), 3004 );
|
||||
AssertH ( !(minLen < 1), 3005 );
|
||||
BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]),
|
||||
minLen, maxLen, alphaSize );
|
||||
}
|
||||
|
||||
/*--- Transmit the mapping table. ---*/
|
||||
{
|
||||
Bool inUse16[16];
|
||||
for (i = 0; i < 16; i++) {
|
||||
inUse16[i] = False;
|
||||
for (j = 0; j < 16; j++)
|
||||
if (s->inUse[i * 16 + j]) inUse16[i] = True;
|
||||
}
|
||||
|
||||
nBytes = s->numZ;
|
||||
for (i = 0; i < 16; i++)
|
||||
if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0);
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
if (inUse16[i])
|
||||
for (j = 0; j < 16; j++) {
|
||||
if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0);
|
||||
}
|
||||
|
||||
if (s->verbosity >= 3)
|
||||
VPrintf1( " bytes: mapping %d, ", s->numZ-nBytes );
|
||||
}
|
||||
|
||||
/*--- Now the selectors. ---*/
|
||||
nBytes = s->numZ;
|
||||
bsW ( s, 3, nGroups );
|
||||
bsW ( s, 15, nSelectors );
|
||||
for (i = 0; i < nSelectors; i++) {
|
||||
for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1);
|
||||
bsW(s,1,0);
|
||||
}
|
||||
if (s->verbosity >= 3)
|
||||
VPrintf1( "selectors %d, ", s->numZ-nBytes );
|
||||
|
||||
/*--- Now the coding tables. ---*/
|
||||
nBytes = s->numZ;
|
||||
|
||||
for (t = 0; t < nGroups; t++) {
|
||||
Int32 curr = s->len[t][0];
|
||||
bsW ( s, 5, curr );
|
||||
for (i = 0; i < alphaSize; i++) {
|
||||
while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ };
|
||||
while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ };
|
||||
bsW ( s, 1, 0 );
|
||||
}
|
||||
}
|
||||
|
||||
if (s->verbosity >= 3)
|
||||
VPrintf1 ( "code lengths %d, ", s->numZ-nBytes );
|
||||
|
||||
/*--- And finally, the block data proper ---*/
|
||||
nBytes = s->numZ;
|
||||
selCtr = 0;
|
||||
gs = 0;
|
||||
while (True) {
|
||||
if (gs >= s->nMTF) break;
|
||||
ge = gs + BZ_G_SIZE - 1;
|
||||
if (ge >= s->nMTF) ge = s->nMTF-1;
|
||||
AssertH ( s->selector[selCtr] < nGroups, 3006 );
|
||||
|
||||
if (nGroups == 6 && 50 == ge-gs+1) {
|
||||
/*--- fast track the common case ---*/
|
||||
UInt16 mtfv_i;
|
||||
UChar* s_len_sel_selCtr
|
||||
= &(s->len[s->selector[selCtr]][0]);
|
||||
Int32* s_code_sel_selCtr
|
||||
= &(s->code[s->selector[selCtr]][0]);
|
||||
|
||||
# define BZ_ITAH(nn) \
|
||||
mtfv_i = mtfv[gs+(nn)]; \
|
||||
bsW ( s, \
|
||||
s_len_sel_selCtr[mtfv_i], \
|
||||
s_code_sel_selCtr[mtfv_i] )
|
||||
|
||||
BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4);
|
||||
BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9);
|
||||
BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
|
||||
BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
|
||||
BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
|
||||
BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
|
||||
BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
|
||||
BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
|
||||
BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
|
||||
BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
|
||||
|
||||
# undef BZ_ITAH
|
||||
|
||||
} else {
|
||||
/*--- slow version which correctly handles all situations ---*/
|
||||
for (i = gs; i <= ge; i++) {
|
||||
bsW ( s,
|
||||
s->len [s->selector[selCtr]] [mtfv[i]],
|
||||
s->code [s->selector[selCtr]] [mtfv[i]] );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
gs = ge+1;
|
||||
selCtr++;
|
||||
}
|
||||
AssertH( selCtr == nSelectors, 3007 );
|
||||
|
||||
if (s->verbosity >= 3)
|
||||
VPrintf1( "codes %d\n", s->numZ-nBytes );
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
void BZ2_compressBlock ( EState* s, Bool is_last_block )
|
||||
{
|
||||
if (s->nblock > 0) {
|
||||
|
||||
BZ_FINALISE_CRC ( s->blockCRC );
|
||||
s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
|
||||
s->combinedCRC ^= s->blockCRC;
|
||||
if (s->blockNo > 1) s->numZ = 0;
|
||||
|
||||
if (s->verbosity >= 2)
|
||||
VPrintf4( " block %d: crc = 0x%8x, "
|
||||
"combined CRC = 0x%8x, size = %d\n",
|
||||
s->blockNo, s->blockCRC, s->combinedCRC, s->nblock );
|
||||
|
||||
BZ2_blockSort ( s );
|
||||
}
|
||||
|
||||
s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]);
|
||||
|
||||
/*-- If this is the first block, create the stream header. --*/
|
||||
if (s->blockNo == 1) {
|
||||
BZ2_bsInitWrite ( s );
|
||||
bsPutUChar ( s, BZ_HDR_B );
|
||||
bsPutUChar ( s, BZ_HDR_Z );
|
||||
bsPutUChar ( s, BZ_HDR_h );
|
||||
bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) );
|
||||
}
|
||||
|
||||
if (s->nblock > 0) {
|
||||
|
||||
bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 );
|
||||
bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 );
|
||||
bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 );
|
||||
|
||||
/*-- Now the block's CRC, so it is in a known place. --*/
|
||||
bsPutUInt32 ( s, s->blockCRC );
|
||||
|
||||
/*--
|
||||
Now a single bit indicating (non-)randomisation.
|
||||
As of version 0.9.5, we use a better sorting algorithm
|
||||
which makes randomisation unnecessary. So always set
|
||||
the randomised bit to 'no'. Of course, the decoder
|
||||
still needs to be able to handle randomised blocks
|
||||
so as to maintain backwards compatibility with
|
||||
older versions of bzip2.
|
||||
--*/
|
||||
bsW(s,1,0);
|
||||
|
||||
bsW ( s, 24, s->origPtr );
|
||||
generateMTFValues ( s );
|
||||
sendMTFValues ( s );
|
||||
}
|
||||
|
||||
|
||||
/*-- If this is the last block, add the stream trailer. --*/
|
||||
if (is_last_block) {
|
||||
|
||||
bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 );
|
||||
bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 );
|
||||
bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 );
|
||||
bsPutUInt32 ( s, s->combinedCRC );
|
||||
if (s->verbosity >= 2)
|
||||
VPrintf1( " final combined CRC = 0x%x\n ", s->combinedCRC );
|
||||
bsFinishWrite ( s );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------------------*/
|
||||
/*--- end compress.c ---*/
|
||||
/*-------------------------------------------------------------*/
|
70
decompress.c
70
decompress.c
|
@ -4,6 +4,11 @@
|
|||
/*--- decompress.c ---*/
|
||||
/*-------------------------------------------------------------*/
|
||||
|
||||
/*-- Modified for use under GNO by Stephen Heumann --*/
|
||||
#ifdef __ORCAC__
|
||||
segment "decompress", dynamic;
|
||||
#endif
|
||||
|
||||
/*--
|
||||
This file is a part of bzip2 and/or libbzip2, a program and
|
||||
library for lossless, block-sorting data compression.
|
||||
|
@ -80,6 +85,36 @@ void makeMaps_d ( DState* s )
|
|||
#define RETURN(rrr) \
|
||||
{ retVal = rrr; goto save_state_and_return; };
|
||||
|
||||
#ifdef __ORCAC__
|
||||
void getBitsOrcaHack(DState *s) {
|
||||
s->bsBuff \
|
||||
= (s->bsBuff << 8) | \
|
||||
((UInt32) \
|
||||
(*((UChar*)(s->strm->next_in)))); \
|
||||
s->bsLive += 8; \
|
||||
s->strm->next_in++; \
|
||||
s->strm->avail_in--; \
|
||||
s->strm->total_in_lo32++; \
|
||||
if (s->strm->total_in_lo32 == 0) \
|
||||
s->strm->total_in_hi32++; \
|
||||
}
|
||||
|
||||
#define GET_BITS(lll,vvv,nnn) \
|
||||
case lll: s->state = lll; \
|
||||
while (True) { \
|
||||
if (s->bsLive >= nnn) { \
|
||||
UInt32 v; \
|
||||
v = (s->bsBuff >> \
|
||||
(s->bsLive-nnn)) & ((1 << nnn)-1); \
|
||||
s->bsLive -= nnn; \
|
||||
vvv = v; \
|
||||
break; \
|
||||
} \
|
||||
if (s->strm->avail_in == 0) RETURN(BZ_OK); \
|
||||
getBitsOrcaHack(s); \
|
||||
}
|
||||
|
||||
#else
|
||||
#define GET_BITS(lll,vvv,nnn) \
|
||||
case lll: s->state = lll; \
|
||||
while (True) { \
|
||||
|
@ -103,6 +138,7 @@ void makeMaps_d ( DState* s )
|
|||
if (s->strm->total_in_lo32 == 0) \
|
||||
s->strm->total_in_hi32++; \
|
||||
}
|
||||
#endif
|
||||
|
||||
#define GET_UCHAR(lll,uuu) \
|
||||
GET_BITS(lll,uuu,8)
|
||||
|
@ -141,7 +177,6 @@ void makeMaps_d ( DState* s )
|
|||
lval = gPerm[zvec - gBase[zn]]; \
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------*/
|
||||
Int32 BZ2_decompress ( DState* s )
|
||||
{
|
||||
|
@ -276,7 +311,7 @@ Int32 BZ2_decompress ( DState* s )
|
|||
|
||||
s->currBlockNo++;
|
||||
if (s->verbosity >= 2)
|
||||
VPrintf1 ( "\n [%d: huff+mtf ", s->currBlockNo );
|
||||
VPrintf1 ( "\n [" Int32_FMT ": huff+mtf ", s->currBlockNo );
|
||||
|
||||
s->storedBlockCRC = 0;
|
||||
GET_UCHAR(BZ_X_BCRC_1, uc);
|
||||
|
@ -341,8 +376,13 @@ Int32 BZ2_decompress ( DState* s )
|
|||
|
||||
/*--- Undo the MTF values for the selectors. ---*/
|
||||
{
|
||||
#ifdef __ORCAC__
|
||||
UChar pos[BZ_N_GROUPS] = { 0, 1, 2, 3, 4, 5 };
|
||||
UChar tmp, v;
|
||||
#else
|
||||
UChar pos[BZ_N_GROUPS], tmp, v;
|
||||
for (v = 0; v < nGroups; v++) pos[v] = v;
|
||||
#endif
|
||||
|
||||
for (i = 0; i < nSelectors; i++) {
|
||||
v = s->selectorMtf[i];
|
||||
|
@ -435,14 +475,22 @@ Int32 BZ2_decompress ( DState* s )
|
|||
if (s->smallDecompress)
|
||||
while (es > 0) {
|
||||
if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
|
||||
#ifdef __ORCAC__
|
||||
*((UInt16 *)(s->ll16)+nblock) = (UInt16)uc;
|
||||
#else
|
||||
s->ll16[nblock] = (UInt16)uc;
|
||||
#endif
|
||||
nblock++;
|
||||
es--;
|
||||
}
|
||||
else
|
||||
while (es > 0) {
|
||||
if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
|
||||
#ifdef __ORCAC__
|
||||
*((UInt32 *)(s->tt)+nblock) = (UInt32)uc;
|
||||
#else
|
||||
s->tt[nblock] = (UInt32)uc;
|
||||
#endif
|
||||
nblock++;
|
||||
es--;
|
||||
};
|
||||
|
@ -509,8 +557,13 @@ Int32 BZ2_decompress ( DState* s )
|
|||
|
||||
s->unzftab[s->seqToUnseq[uc]]++;
|
||||
if (s->smallDecompress)
|
||||
#ifdef __ORCAC__
|
||||
*((UInt16 *)(s->ll16)+nblock) = (UInt16)(s->seqToUnseq[uc]); else
|
||||
*((UInt32 *)(s->tt)+nblock) = (UInt32)(s->seqToUnseq[uc]);
|
||||
#else
|
||||
s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else
|
||||
s->tt[nblock] = (UInt32)(s->seqToUnseq[uc]);
|
||||
#endif
|
||||
nblock++;
|
||||
|
||||
GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym);
|
||||
|
@ -542,7 +595,11 @@ Int32 BZ2_decompress ( DState* s )
|
|||
|
||||
/*-- compute the T vector --*/
|
||||
for (i = 0; i < nblock; i++) {
|
||||
#ifdef __ORCAC__
|
||||
uc = (UChar) *((UInt16 *)(s->ll16)+i);
|
||||
#else
|
||||
uc = (UChar)(s->ll16[i]);
|
||||
#endif
|
||||
SET_LL(i, s->cftabCopy[uc]);
|
||||
s->cftabCopy[uc]++;
|
||||
}
|
||||
|
@ -572,12 +629,21 @@ Int32 BZ2_decompress ( DState* s )
|
|||
|
||||
/*-- compute the T^(-1) vector --*/
|
||||
for (i = 0; i < nblock; i++) {
|
||||
#ifdef __ORCAC__
|
||||
uc = (UChar)((*((UInt32 *)(s->tt)+i)) & 0xff);
|
||||
*((UInt32 *)(s->tt)+(s->cftab[uc])) |= (i << 8);
|
||||
#else
|
||||
uc = (UChar)(s->tt[i] & 0xff);
|
||||
s->tt[s->cftab[uc]] |= (i << 8);
|
||||
#endif
|
||||
s->cftab[uc]++;
|
||||
}
|
||||
|
||||
#ifdef __ORCAC__
|
||||
s->tPos = (*((UInt32 *)(s->tt)+(s->origPtr))) >> 8;
|
||||
#else
|
||||
s->tPos = s->tt[s->origPtr] >> 8;
|
||||
#endif
|
||||
s->nblock_used = 0;
|
||||
if (s->blockRandomised) {
|
||||
BZ_RAND_INIT_MASK;
|
||||
|
|
176
dlltest.c
176
dlltest.c
|
@ -1,176 +0,0 @@
|
|||
/*
|
||||
minibz2
|
||||
libbz2.dll test program.
|
||||
by Yoshioka Tsuneo(QWF00133@nifty.ne.jp/tsuneo-y@is.aist-nara.ac.jp)
|
||||
This file is Public Domain.
|
||||
welcome any email to me.
|
||||
|
||||
usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename]
|
||||
*/
|
||||
|
||||
#define BZ_IMPORT
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "bzlib.h"
|
||||
#ifdef _WIN32
|
||||
#include <io.h>
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#define BZ2_LIBNAME "libbz2-1.0.2.DLL"
|
||||
|
||||
#include <windows.h>
|
||||
static int BZ2DLLLoaded = 0;
|
||||
static HINSTANCE BZ2DLLhLib;
|
||||
int BZ2DLLLoadLibrary(void)
|
||||
{
|
||||
HINSTANCE hLib;
|
||||
|
||||
if(BZ2DLLLoaded==1){return 0;}
|
||||
hLib=LoadLibrary(BZ2_LIBNAME);
|
||||
if(hLib == NULL){
|
||||
fprintf(stderr,"Can't load %s\n",BZ2_LIBNAME);
|
||||
return -1;
|
||||
}
|
||||
BZ2_bzlibVersion=GetProcAddress(hLib,"BZ2_bzlibVersion");
|
||||
BZ2_bzopen=GetProcAddress(hLib,"BZ2_bzopen");
|
||||
BZ2_bzdopen=GetProcAddress(hLib,"BZ2_bzdopen");
|
||||
BZ2_bzread=GetProcAddress(hLib,"BZ2_bzread");
|
||||
BZ2_bzwrite=GetProcAddress(hLib,"BZ2_bzwrite");
|
||||
BZ2_bzflush=GetProcAddress(hLib,"BZ2_bzflush");
|
||||
BZ2_bzclose=GetProcAddress(hLib,"BZ2_bzclose");
|
||||
BZ2_bzerror=GetProcAddress(hLib,"BZ2_bzerror");
|
||||
|
||||
if (!BZ2_bzlibVersion || !BZ2_bzopen || !BZ2_bzdopen
|
||||
|| !BZ2_bzread || !BZ2_bzwrite || !BZ2_bzflush
|
||||
|| !BZ2_bzclose || !BZ2_bzerror) {
|
||||
fprintf(stderr,"GetProcAddress failed.\n");
|
||||
return -1;
|
||||
}
|
||||
BZ2DLLLoaded=1;
|
||||
BZ2DLLhLib=hLib;
|
||||
return 0;
|
||||
|
||||
}
|
||||
int BZ2DLLFreeLibrary(void)
|
||||
{
|
||||
if(BZ2DLLLoaded==0){return 0;}
|
||||
FreeLibrary(BZ2DLLhLib);
|
||||
BZ2DLLLoaded=0;
|
||||
}
|
||||
#endif /* WIN32 */
|
||||
|
||||
void usage(void)
|
||||
{
|
||||
puts("usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename]");
|
||||
}
|
||||
|
||||
int main(int argc,char *argv[])
|
||||
{
|
||||
int decompress = 0;
|
||||
int level = 9;
|
||||
char *fn_r = NULL;
|
||||
char *fn_w = NULL;
|
||||
|
||||
#ifdef _WIN32
|
||||
if(BZ2DLLLoadLibrary()<0){
|
||||
fprintf(stderr,"Loading of %s failed. Giving up.\n", BZ2_LIBNAME);
|
||||
exit(1);
|
||||
}
|
||||
printf("Loading of %s succeeded. Library version is %s.\n",
|
||||
BZ2_LIBNAME, BZ2_bzlibVersion() );
|
||||
#endif
|
||||
while(++argv,--argc){
|
||||
if(**argv =='-' || **argv=='/'){
|
||||
char *p;
|
||||
|
||||
for(p=*argv+1;*p;p++){
|
||||
if(*p=='d'){
|
||||
decompress = 1;
|
||||
}else if('1'<=*p && *p<='9'){
|
||||
level = *p - '0';
|
||||
}else{
|
||||
usage();
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}else{
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(argc>=1){
|
||||
fn_r = *argv;
|
||||
argc--;argv++;
|
||||
}else{
|
||||
fn_r = NULL;
|
||||
}
|
||||
if(argc>=1){
|
||||
fn_w = *argv;
|
||||
argc--;argv++;
|
||||
}else{
|
||||
fn_w = NULL;
|
||||
}
|
||||
{
|
||||
int len;
|
||||
char buff[0x1000];
|
||||
char mode[10];
|
||||
|
||||
if(decompress){
|
||||
BZFILE *BZ2fp_r = NULL;
|
||||
FILE *fp_w = NULL;
|
||||
|
||||
if(fn_w){
|
||||
if((fp_w = fopen(fn_w,"wb"))==NULL){
|
||||
printf("can't open [%s]\n",fn_w);
|
||||
perror("reason:");
|
||||
exit(1);
|
||||
}
|
||||
}else{
|
||||
fp_w = stdout;
|
||||
}
|
||||
if((fn_r == NULL && (BZ2fp_r = BZ2_bzdopen(fileno(stdin),"rb"))==NULL)
|
||||
|| (fn_r != NULL && (BZ2fp_r = BZ2_bzopen(fn_r,"rb"))==NULL)){
|
||||
printf("can't bz2openstream\n");
|
||||
exit(1);
|
||||
}
|
||||
while((len=BZ2_bzread(BZ2fp_r,buff,0x1000))>0){
|
||||
fwrite(buff,1,len,fp_w);
|
||||
}
|
||||
BZ2_bzclose(BZ2fp_r);
|
||||
if(fp_w != stdout) fclose(fp_w);
|
||||
}else{
|
||||
BZFILE *BZ2fp_w = NULL;
|
||||
FILE *fp_r = NULL;
|
||||
|
||||
if(fn_r){
|
||||
if((fp_r = fopen(fn_r,"rb"))==NULL){
|
||||
printf("can't open [%s]\n",fn_r);
|
||||
perror("reason:");
|
||||
exit(1);
|
||||
}
|
||||
}else{
|
||||
fp_r = stdin;
|
||||
}
|
||||
mode[0]='w';
|
||||
mode[1] = '0' + level;
|
||||
mode[2] = '\0';
|
||||
|
||||
if((fn_w == NULL && (BZ2fp_w = BZ2_bzdopen(fileno(stdout),mode))==NULL)
|
||||
|| (fn_w !=NULL && (BZ2fp_w = BZ2_bzopen(fn_w,mode))==NULL)){
|
||||
printf("can't bz2openstream\n");
|
||||
exit(1);
|
||||
}
|
||||
while((len=fread(buff,1,0x1000,fp_r))>0){
|
||||
BZ2_bzwrite(BZ2fp_w,buff,len);
|
||||
}
|
||||
BZ2_bzclose(BZ2fp_w);
|
||||
if(fp_r!=stdin)fclose(fp_r);
|
||||
}
|
||||
}
|
||||
#ifdef _WIN32
|
||||
BZ2DLLFreeLibrary();
|
||||
#endif
|
||||
return 0;
|
||||
}
|
93
dlltest.dsp
93
dlltest.dsp
|
@ -1,93 +0,0 @@
|
|||
# Microsoft Developer Studio Project File - Name="dlltest" - Package Owner=<4>
|
||||
# Microsoft Developer Studio Generated Build File, Format Version 5.00
|
||||
# ** 編集しないでください **
|
||||
|
||||
# TARGTYPE "Win32 (x86) Console Application" 0x0103
|
||||
|
||||
CFG=dlltest - Win32 Debug
|
||||
!MESSAGE これは有効なメイクファイルではありません。 このプロジェクトをビルドするためには NMAKE を使用してください。
|
||||
!MESSAGE [メイクファイルのエクスポート] コマンドを使用して実行してください
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "dlltest.mak".
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE の実行時に構成を指定できます
|
||||
!MESSAGE コマンド ライン上でマクロの設定を定義します。例:
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "dlltest.mak" CFG="dlltest - Win32 Debug"
|
||||
!MESSAGE
|
||||
!MESSAGE 選択可能なビルド モード:
|
||||
!MESSAGE
|
||||
!MESSAGE "dlltest - Win32 Release" ("Win32 (x86) Console Application" 用)
|
||||
!MESSAGE "dlltest - Win32 Debug" ("Win32 (x86) Console Application" 用)
|
||||
!MESSAGE
|
||||
|
||||
# Begin Project
|
||||
# PROP Scc_ProjName ""
|
||||
# PROP Scc_LocalPath ""
|
||||
CPP=cl.exe
|
||||
RSC=rc.exe
|
||||
|
||||
!IF "$(CFG)" == "dlltest - Win32 Release"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 0
|
||||
# PROP BASE Output_Dir "Release"
|
||||
# PROP BASE Intermediate_Dir "Release"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 0
|
||||
# PROP Output_Dir "Release"
|
||||
# PROP Intermediate_Dir "Release"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
|
||||
# ADD CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
|
||||
# ADD BASE RSC /l 0x411 /d "NDEBUG"
|
||||
# ADD RSC /l 0x411 /d "NDEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
|
||||
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 /out:"minibz2.exe"
|
||||
|
||||
!ELSEIF "$(CFG)" == "dlltest - Win32 Debug"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 1
|
||||
# PROP BASE Output_Dir "dlltest_"
|
||||
# PROP BASE Intermediate_Dir "dlltest_"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 1
|
||||
# PROP Output_Dir "dlltest_"
|
||||
# PROP Intermediate_Dir "dlltest_"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
# ADD BASE CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
|
||||
# ADD CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
|
||||
# ADD BASE RSC /l 0x411 /d "_DEBUG"
|
||||
# ADD RSC /l 0x411 /d "_DEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
|
||||
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /out:"minibz2.exe" /pdbtype:sept
|
||||
|
||||
!ENDIF
|
||||
|
||||
# Begin Target
|
||||
|
||||
# Name "dlltest - Win32 Release"
|
||||
# Name "dlltest - Win32 Debug"
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\bzlib.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\dlltest.c
|
||||
# End Source File
|
||||
# End Target
|
||||
# End Project
|
11
huffman.c
11
huffman.c
|
@ -4,6 +4,11 @@
|
|||
/*--- huffman.c ---*/
|
||||
/*-------------------------------------------------------------*/
|
||||
|
||||
/*-- Modified for use under GNO by Stephen Heumann --*/
|
||||
#ifdef __ORCAC__
|
||||
segment "bzip2";
|
||||
#endif
|
||||
|
||||
/*--
|
||||
This file is a part of bzip2 and/or libbzip2, a program and
|
||||
library for lossless, block-sorting data compression.
|
||||
|
@ -112,9 +117,15 @@ void BZ2_hbMakeCodeLengths ( UChar *len,
|
|||
Int32 nNodes, nHeap, n1, n2, i, j, k;
|
||||
Bool tooLong;
|
||||
|
||||
#ifdef __ORCAC__
|
||||
static Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ];
|
||||
static Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ];
|
||||
static Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ];
|
||||
#else
|
||||
Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ];
|
||||
Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ];
|
||||
Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ];
|
||||
#endif
|
||||
|
||||
for (i = 0; i < alphaSize; i++)
|
||||
weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
|
||||
|
|
27
libbz2.def
27
libbz2.def
|
@ -1,27 +0,0 @@
|
|||
LIBRARY LIBBZ2
|
||||
DESCRIPTION "libbzip2: library for data compression"
|
||||
EXPORTS
|
||||
BZ2_bzCompressInit
|
||||
BZ2_bzCompress
|
||||
BZ2_bzCompressEnd
|
||||
BZ2_bzDecompressInit
|
||||
BZ2_bzDecompress
|
||||
BZ2_bzDecompressEnd
|
||||
BZ2_bzReadOpen
|
||||
BZ2_bzReadClose
|
||||
BZ2_bzReadGetUnused
|
||||
BZ2_bzRead
|
||||
BZ2_bzWriteOpen
|
||||
BZ2_bzWrite
|
||||
BZ2_bzWriteClose
|
||||
BZ2_bzWriteClose64
|
||||
BZ2_bzBuffToBuffCompress
|
||||
BZ2_bzBuffToBuffDecompress
|
||||
BZ2_bzlibVersion
|
||||
BZ2_bzopen
|
||||
BZ2_bzdopen
|
||||
BZ2_bzread
|
||||
BZ2_bzwrite
|
||||
BZ2_bzflush
|
||||
BZ2_bzclose
|
||||
BZ2_bzerror
|
130
libbz2.dsp
130
libbz2.dsp
|
@ -1,130 +0,0 @@
|
|||
# Microsoft Developer Studio Project File - Name="libbz2" - Package Owner=<4>
|
||||
# Microsoft Developer Studio Generated Build File, Format Version 5.00
|
||||
# ** 編集しないでください **
|
||||
|
||||
# TARGTYPE "Win32 (x86) Dynamic-Link Library" 0x0102
|
||||
|
||||
CFG=libbz2 - Win32 Debug
|
||||
!MESSAGE これは有効なメイクファイルではありません。 このプロジェクトをビルドするためには NMAKE を使用してください。
|
||||
!MESSAGE [メイクファイルのエクスポート] コマンドを使用して実行してください
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "libbz2.mak".
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE の実行時に構成を指定できます
|
||||
!MESSAGE コマンド ライン上でマクロの設定を定義します。例:
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "libbz2.mak" CFG="libbz2 - Win32 Debug"
|
||||
!MESSAGE
|
||||
!MESSAGE 選択可能なビルド モード:
|
||||
!MESSAGE
|
||||
!MESSAGE "libbz2 - Win32 Release" ("Win32 (x86) Dynamic-Link Library" 用)
|
||||
!MESSAGE "libbz2 - Win32 Debug" ("Win32 (x86) Dynamic-Link Library" 用)
|
||||
!MESSAGE
|
||||
|
||||
# Begin Project
|
||||
# PROP Scc_ProjName ""
|
||||
# PROP Scc_LocalPath ""
|
||||
CPP=cl.exe
|
||||
MTL=midl.exe
|
||||
RSC=rc.exe
|
||||
|
||||
!IF "$(CFG)" == "libbz2 - Win32 Release"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 0
|
||||
# PROP BASE Output_Dir "Release"
|
||||
# PROP BASE Intermediate_Dir "Release"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 0
|
||||
# PROP Output_Dir "Release"
|
||||
# PROP Intermediate_Dir "Release"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /YX /FD /c
|
||||
# ADD CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /YX /FD /c
|
||||
# ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /o NUL /win32
|
||||
# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /o NUL /win32
|
||||
# ADD BASE RSC /l 0x411 /d "NDEBUG"
|
||||
# ADD RSC /l 0x411 /d "NDEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /machine:I386
|
||||
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /machine:I386 /out:"libbz2.dll"
|
||||
|
||||
!ELSEIF "$(CFG)" == "libbz2 - Win32 Debug"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 1
|
||||
# PROP BASE Output_Dir "Debug"
|
||||
# PROP BASE Intermediate_Dir "Debug"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 1
|
||||
# PROP Output_Dir "Debug"
|
||||
# PROP Intermediate_Dir "Debug"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /YX /FD /c
|
||||
# ADD CPP /nologo /MTd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /YX /FD /c
|
||||
# ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /o NUL /win32
|
||||
# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /o NUL /win32
|
||||
# ADD BASE RSC /l 0x411 /d "_DEBUG"
|
||||
# ADD RSC /l 0x411 /d "_DEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /debug /machine:I386 /pdbtype:sept
|
||||
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /debug /machine:I386 /out:"libbz2.dll" /pdbtype:sept
|
||||
|
||||
!ENDIF
|
||||
|
||||
# Begin Target
|
||||
|
||||
# Name "libbz2 - Win32 Release"
|
||||
# Name "libbz2 - Win32 Debug"
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\blocksort.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\bzlib.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\bzlib.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\bzlib_private.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\compress.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\crctable.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\decompress.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\huffman.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\libbz2.def
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\randtable.c
|
||||
# End Source File
|
||||
# End Target
|
||||
# End Project
|
63
makefile.msc
63
makefile.msc
|
@ -1,63 +0,0 @@
|
|||
# Makefile for Microsoft Visual C++ 6.0
|
||||
# usage: nmake -f makefile.msc
|
||||
# K.M. Syring (syring@gsf.de)
|
||||
# Fixed up by JRS for bzip2-0.9.5d release.
|
||||
|
||||
CC=cl
|
||||
CFLAGS= -DWIN32 -MD -Ox -D_FILE_OFFSET_BITS=64 -nologo
|
||||
|
||||
OBJS= blocksort.obj \
|
||||
huffman.obj \
|
||||
crctable.obj \
|
||||
randtable.obj \
|
||||
compress.obj \
|
||||
decompress.obj \
|
||||
bzlib.obj
|
||||
|
||||
all: lib bzip2 test
|
||||
|
||||
bzip2: lib
|
||||
$(CC) $(CFLAGS) -o bzip2 bzip2.c libbz2.lib setargv.obj
|
||||
$(CC) $(CFLAGS) -o bzip2recover bzip2recover.c
|
||||
|
||||
lib: $(OBJS)
|
||||
lib /out:libbz2.lib $(OBJS)
|
||||
|
||||
test: bzip2
|
||||
type words1
|
||||
.\\bzip2 -1 < sample1.ref > sample1.rb2
|
||||
.\\bzip2 -2 < sample2.ref > sample2.rb2
|
||||
.\\bzip2 -3 < sample3.ref > sample3.rb2
|
||||
.\\bzip2 -d < sample1.bz2 > sample1.tst
|
||||
.\\bzip2 -d < sample2.bz2 > sample2.tst
|
||||
.\\bzip2 -ds < sample3.bz2 > sample3.tst
|
||||
@echo All six of the fc's should find no differences.
|
||||
@echo If fc finds an error on sample3.bz2, this could be
|
||||
@echo because WinZip's 'TAR file smart CR/LF conversion'
|
||||
@echo is too clever for its own good. Disable this option.
|
||||
@echo The correct size for sample3.ref is 120,244. If it
|
||||
@echo is 150,251, WinZip has messed it up.
|
||||
fc sample1.bz2 sample1.rb2
|
||||
fc sample2.bz2 sample2.rb2
|
||||
fc sample3.bz2 sample3.rb2
|
||||
fc sample1.tst sample1.ref
|
||||
fc sample2.tst sample2.ref
|
||||
fc sample3.tst sample3.ref
|
||||
|
||||
|
||||
|
||||
clean:
|
||||
del *.obj
|
||||
del libbz2.lib
|
||||
del bzip2.exe
|
||||
del bzip2recover.exe
|
||||
del sample1.rb2
|
||||
del sample2.rb2
|
||||
del sample3.rb2
|
||||
del sample1.tst
|
||||
del sample2.tst
|
||||
del sample3.tst
|
||||
|
||||
.c.obj:
|
||||
$(CC) $(CFLAGS) -c $*.c -o $*.obj
|
||||
|
117
manual.html
117
manual.html
|
@ -1,117 +0,0 @@
|
|||
<HTML>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<!-- Created on January, 5 2002 by texi2html 1.64 -->
|
||||
<!--
|
||||
Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author)
|
||||
Karl Berry <karl@freefriends.org>
|
||||
Olaf Bachmann <obachman@mathematik.uni-kl.de>
|
||||
and many others.
|
||||
Maintained by: Olaf Bachmann <obachman@mathematik.uni-kl.de>
|
||||
Send bugs and suggestions to <texi2html@mathematik.uni-kl.de>
|
||||
|
||||
-->
|
||||
<HEAD>
|
||||
<TITLE>Untitled Document: Untitled Document</TITLE>
|
||||
|
||||
<META NAME="description" CONTENT="Untitled Document: Untitled Document">
|
||||
<META NAME="keywords" CONTENT="Untitled Document: Untitled Document">
|
||||
<META NAME="resource-type" CONTENT="document">
|
||||
<META NAME="distribution" CONTENT="global">
|
||||
<META NAME="Generator" CONTENT="texi2html 1.64">
|
||||
|
||||
</HEAD>
|
||||
|
||||
<BODY LANG="" BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#800080" ALINK="#FF0000">
|
||||
|
||||
<A NAME="SEC_Top"></A>
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<H1>Untitled Document</H1></P><P>
|
||||
|
||||
The following text is the License for this software. You should
|
||||
find it identical to that contained in the file LICENSE in the
|
||||
source distribution.
|
||||
</P><P>
|
||||
|
||||
@bf{------------------ START OF THE LICENSE ------------------}
|
||||
</P><P>
|
||||
|
||||
This program, <CODE>bzip2</CODE>,
|
||||
and associated library <CODE>libbzip2</CODE>, are
|
||||
Copyright (C) 1996-2002 Julian R Seward. All rights reserved.
|
||||
</P><P>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
<UL>
|
||||
<LI>
|
||||
Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
<LI>
|
||||
The origin of this software must not be misrepresented; you must
|
||||
not claim that you wrote the original software. If you use this
|
||||
software in a product, an acknowledgment in the product
|
||||
documentation would be appreciated but is not required.
|
||||
<LI>
|
||||
Altered source versions must be plainly marked as such, and must
|
||||
not be misrepresented as being the original software.
|
||||
<LI>
|
||||
The name of the author may not be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
</UL>
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS
|
||||
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||||
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
<P>
|
||||
|
||||
Julian Seward, Cambridge, UK.
|
||||
</P><P>
|
||||
|
||||
<CODE>jseward@acm.org</CODE>
|
||||
</P><P>
|
||||
|
||||
<CODE>bzip2</CODE>/<CODE>libbzip2</CODE> version 1.0.2 of 30 December 2001.
|
||||
</P><P>
|
||||
|
||||
@bf{------------------ END OF THE LICENSE ------------------}
|
||||
</P><P>
|
||||
|
||||
Web sites:
|
||||
</P><P>
|
||||
|
||||
<CODE>http://sources.redhat.com/bzip2</CODE>
|
||||
</P><P>
|
||||
|
||||
<CODE>http://www.cacheprof.org</CODE>
|
||||
</P><P>
|
||||
|
||||
PATENTS: To the best of my knowledge, <CODE>bzip2</CODE> does not use any patented
|
||||
algorithms. However, I do not have the resources available to carry out
|
||||
a full patent search. Therefore I cannot give any guarantee of the
|
||||
above statement.
|
||||
</P><P>
|
||||
|
||||
<HR SIZE=1>
|
||||
<BR>
|
||||
<FONT SIZE="-1">
|
||||
This document was generated
|
||||
by <I>Julian Seward</I> on <I>January, 5 2002</I>
|
||||
using <A HREF="http://www.mathematik.uni-kl.de/~obachman/Texi2html
|
||||
"><I>texi2html</I></A>
|
||||
|
||||
</BODY>
|
||||
</HTML>
|
BIN
manual.pdf
BIN
manual.pdf
Binary file not shown.
2243
manual.texi
2243
manual.texi
File diff suppressed because it is too large
Load Diff
|
@ -1,81 +0,0 @@
|
|||
<HTML>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<!-- Created on January, 5 2002 by texi2html 1.64 -->
|
||||
<!--
|
||||
Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author)
|
||||
Karl Berry <karl@freefriends.org>
|
||||
Olaf Bachmann <obachman@mathematik.uni-kl.de>
|
||||
and many others.
|
||||
Maintained by: Olaf Bachmann <obachman@mathematik.uni-kl.de>
|
||||
Send bugs and suggestions to <texi2html@mathematik.uni-kl.de>
|
||||
|
||||
-->
|
||||
<HEAD>
|
||||
<TITLE>Untitled Document: 1. Introduction</TITLE>
|
||||
|
||||
<META NAME="description" CONTENT="Untitled Document: 1. Introduction">
|
||||
<META NAME="keywords" CONTENT="Untitled Document: 1. Introduction">
|
||||
<META NAME="resource-type" CONTENT="document">
|
||||
<META NAME="distribution" CONTENT="global">
|
||||
<META NAME="Generator" CONTENT="texi2html 1.64">
|
||||
|
||||
</HEAD>
|
||||
|
||||
<BODY LANG="" BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#800080" ALINK="#FF0000">
|
||||
|
||||
<A NAME="SEC1"></A>
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> < </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC2"> > </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[ << ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ >> ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<H1> 1. Introduction </H1>
|
||||
<!--docid::SEC1::-->
|
||||
<P>
|
||||
|
||||
<CODE>bzip2</CODE> compresses files using the Burrows-Wheeler
|
||||
block-sorting text compression algorithm, and Huffman coding.
|
||||
Compression is generally considerably better than that
|
||||
achieved by more conventional LZ77/LZ78-based compressors,
|
||||
and approaches the performance of the PPM family of statistical compressors.
|
||||
</P><P>
|
||||
|
||||
<CODE>bzip2</CODE> is built on top of <CODE>libbzip2</CODE>, a flexible library
|
||||
for handling compressed data in the <CODE>bzip2</CODE> format. This manual
|
||||
describes both how to use the program and
|
||||
how to work with the library interface. Most of the
|
||||
manual is devoted to this library, not the program,
|
||||
which is good news if your interest is only in the program.
|
||||
</P><P>
|
||||
|
||||
Chapter 2 describes how to use <CODE>bzip2</CODE>; this is the only part
|
||||
you need to read if you just want to know how to operate the program.
|
||||
Chapter 3 describes the programming interfaces in detail, and
|
||||
Chapter 4 records some miscellaneous notes which I thought
|
||||
ought to be recorded somewhere.
|
||||
</P><P>
|
||||
|
||||
<HR SIZE="6">
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[ << ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ >> ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<BR>
|
||||
<FONT SIZE="-1">
|
||||
This document was generated
|
||||
by <I>Julian Seward</I> on <I>January, 5 2002</I>
|
||||
using <A HREF="http://www.mathematik.uni-kl.de/~obachman/Texi2html
|
||||
"><I>texi2html</I></A>
|
||||
|
||||
</BODY>
|
||||
</HTML>
|
579
manual_2.html
579
manual_2.html
|
@ -1,579 +0,0 @@
|
|||
<HTML>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<!-- Created on January, 5 2002 by texi2html 1.64 -->
|
||||
<!--
|
||||
Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author)
|
||||
Karl Berry <karl@freefriends.org>
|
||||
Olaf Bachmann <obachman@mathematik.uni-kl.de>
|
||||
and many others.
|
||||
Maintained by: Olaf Bachmann <obachman@mathematik.uni-kl.de>
|
||||
Send bugs and suggestions to <texi2html@mathematik.uni-kl.de>
|
||||
|
||||
-->
|
||||
<HEAD>
|
||||
<TITLE>Untitled Document: 2. How to use <CODE>bzip2</CODE></TITLE>
|
||||
|
||||
<META NAME="description" CONTENT="Untitled Document: 2. How to use <CODE>bzip2</CODE>">
|
||||
<META NAME="keywords" CONTENT="Untitled Document: 2. How to use <CODE>bzip2</CODE>">
|
||||
<META NAME="resource-type" CONTENT="document">
|
||||
<META NAME="distribution" CONTENT="global">
|
||||
<META NAME="Generator" CONTENT="texi2html 1.64">
|
||||
|
||||
</HEAD>
|
||||
|
||||
<BODY LANG="" BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#800080" ALINK="#FF0000">
|
||||
|
||||
<A NAME="SEC2"></A>
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_1.html#SEC1"> < </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC3"> > </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[ << ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ >> ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<H1> 2. How to use <CODE>bzip2</CODE> </H1>
|
||||
<!--docid::SEC2::-->
|
||||
<P>
|
||||
|
||||
This chapter contains a copy of the <CODE>bzip2</CODE> man page,
|
||||
and nothing else.
|
||||
</P><P>
|
||||
|
||||
<BLOCKQUOTE>
|
||||
<P>
|
||||
|
||||
<HR SIZE="6">
|
||||
<A NAME="SEC3"></A>
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC2"> < </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC4"> > </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[ << ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ >> ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<H4> NAME </H4>
|
||||
<!--docid::SEC3::-->
|
||||
<UL>
|
||||
<LI><CODE>bzip2</CODE>, <CODE>bunzip2</CODE>
|
||||
- a block-sorting file compressor, v1.0.2
|
||||
<LI><CODE>bzcat</CODE>
|
||||
- decompresses files to stdout
|
||||
<LI><CODE>bzip2recover</CODE>
|
||||
- recovers data from damaged bzip2 files
|
||||
</UL>
|
||||
<P>
|
||||
|
||||
<HR SIZE="6">
|
||||
<A NAME="SEC4"></A>
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC3"> < </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC5"> > </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[ << ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ >> ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<H4> SYNOPSIS </H4>
|
||||
<!--docid::SEC4::-->
|
||||
<UL>
|
||||
<LI><CODE>bzip2</CODE> [ -cdfkqstvzVL123456789 ] [ filenames ... ]
|
||||
<LI><CODE>bunzip2</CODE> [ -fkvsVL ] [ filenames ... ]
|
||||
<LI><CODE>bzcat</CODE> [ -s ] [ filenames ... ]
|
||||
<LI><CODE>bzip2recover</CODE> filename
|
||||
</UL>
|
||||
<P>
|
||||
|
||||
<HR SIZE="6">
|
||||
<A NAME="SEC5"></A>
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC4"> < </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC6"> > </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[ << ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ >> ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<H4> DESCRIPTION </H4>
|
||||
<!--docid::SEC5::-->
|
||||
<P>
|
||||
|
||||
<CODE>bzip2</CODE> compresses files using the Burrows-Wheeler block sorting
|
||||
text compression algorithm, and Huffman coding. Compression is
|
||||
generally considerably better than that achieved by more conventional
|
||||
LZ77/LZ78-based compressors, and approaches the performance of the PPM
|
||||
family of statistical compressors.
|
||||
</P><P>
|
||||
|
||||
The command-line options are deliberately very similar to those of GNU
|
||||
<CODE>gzip</CODE>, but they are not identical.
|
||||
</P><P>
|
||||
|
||||
<CODE>bzip2</CODE> expects a list of file names to accompany the command-line
|
||||
flags. Each file is replaced by a compressed version of itself, with
|
||||
the name <CODE>original_name.bz2</CODE>. Each compressed file has the same
|
||||
modification date, permissions, and, when possible, ownership as the
|
||||
corresponding original, so that these properties can be correctly
|
||||
restored at decompression time. File name handling is naive in the
|
||||
sense that there is no mechanism for preserving original file names,
|
||||
permissions, ownerships or dates in filesystems which lack these
|
||||
concepts, or have serious file name length restrictions, such as MS-DOS.
|
||||
</P><P>
|
||||
|
||||
<CODE>bzip2</CODE> and <CODE>bunzip2</CODE> will by default not overwrite existing
|
||||
files. If you want this to happen, specify the <CODE>-f</CODE> flag.
|
||||
</P><P>
|
||||
|
||||
If no file names are specified, <CODE>bzip2</CODE> compresses from standard
|
||||
input to standard output. In this case, <CODE>bzip2</CODE> will decline to
|
||||
write compressed output to a terminal, as this would be entirely
|
||||
incomprehensible and therefore pointless.
|
||||
</P><P>
|
||||
|
||||
<CODE>bunzip2</CODE> (or <CODE>bzip2 -d</CODE>) decompresses all
|
||||
specified files. Files which were not created by <CODE>bzip2</CODE>
|
||||
will be detected and ignored, and a warning issued.
|
||||
<CODE>bzip2</CODE> attempts to guess the filename for the decompressed file
|
||||
from that of the compressed file as follows:
|
||||
<UL>
|
||||
<LI><CODE>filename.bz2 </CODE> becomes <CODE>filename</CODE>
|
||||
<LI><CODE>filename.bz </CODE> becomes <CODE>filename</CODE>
|
||||
<LI><CODE>filename.tbz2</CODE> becomes <CODE>filename.tar</CODE>
|
||||
<LI><CODE>filename.tbz </CODE> becomes <CODE>filename.tar</CODE>
|
||||
<LI><CODE>anyothername </CODE> becomes <CODE>anyothername.out</CODE>
|
||||
</UL>
|
||||
If the file does not end in one of the recognised endings,
|
||||
<CODE>.bz2</CODE>, <CODE>.bz</CODE>,
|
||||
<CODE>.tbz2</CODE> or <CODE>.tbz</CODE>, <CODE>bzip2</CODE> complains that it cannot
|
||||
guess the name of the original file, and uses the original name
|
||||
with <CODE>.out</CODE> appended.
|
||||
<P>
|
||||
|
||||
As with compression, supplying no
|
||||
filenames causes decompression from standard input to standard output.
|
||||
</P><P>
|
||||
|
||||
<CODE>bunzip2</CODE> will correctly decompress a file which is the
|
||||
concatenation of two or more compressed files. The result is the
|
||||
concatenation of the corresponding uncompressed files. Integrity
|
||||
testing (<CODE>-t</CODE>) of concatenated compressed files is also supported.
|
||||
</P><P>
|
||||
|
||||
You can also compress or decompress files to the standard output by
|
||||
giving the <CODE>-c</CODE> flag. Multiple files may be compressed and
|
||||
decompressed like this. The resulting outputs are fed sequentially to
|
||||
stdout. Compression of multiple files in this manner generates a stream
|
||||
containing multiple compressed file representations. Such a stream
|
||||
can be decompressed correctly only by <CODE>bzip2</CODE> version 0.9.0 or
|
||||
later. Earlier versions of <CODE>bzip2</CODE> will stop after decompressing
|
||||
the first file in the stream.
|
||||
</P><P>
|
||||
|
||||
<CODE>bzcat</CODE> (or <CODE>bzip2 -dc</CODE>) decompresses all specified files to
|
||||
the standard output.
|
||||
</P><P>
|
||||
|
||||
<CODE>bzip2</CODE> will read arguments from the environment variables
|
||||
<CODE>BZIP2</CODE> and <CODE>BZIP</CODE>, in that order, and will process them
|
||||
before any arguments read from the command line. This gives a
|
||||
convenient way to supply default arguments.
|
||||
</P><P>
|
||||
|
||||
Compression is always performed, even if the compressed file is slightly
|
||||
larger than the original. Files of less than about one hundred bytes
|
||||
tend to get larger, since the compression mechanism has a constant
|
||||
overhead in the region of 50 bytes. Random data (including the output
|
||||
of most file compressors) is coded at about 8.05 bits per byte, giving
|
||||
an expansion of around 0.5%.
|
||||
</P><P>
|
||||
|
||||
As a self-check for your protection, <CODE>bzip2</CODE> uses 32-bit CRCs to
|
||||
make sure that the decompressed version of a file is identical to the
|
||||
original. This guards against corruption of the compressed data, and
|
||||
against undetected bugs in <CODE>bzip2</CODE> (hopefully very unlikely). The
|
||||
chances of data corruption going undetected is microscopic, about one
|
||||
chance in four billion for each file processed. Be aware, though, that
|
||||
the check occurs upon decompression, so it can only tell you that
|
||||
something is wrong. It can't help you recover the original uncompressed
|
||||
data. You can use <CODE>bzip2recover</CODE> to try to recover data from
|
||||
damaged files.
|
||||
</P><P>
|
||||
|
||||
Return values: 0 for a normal exit, 1 for environmental problems (file
|
||||
not found, invalid flags, I/O errors, &c), 2 to indicate a corrupt
|
||||
compressed file, 3 for an internal consistency error (eg, bug) which
|
||||
caused <CODE>bzip2</CODE> to panic.
|
||||
</P><P>
|
||||
|
||||
<HR SIZE="6">
|
||||
<A NAME="SEC6"></A>
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC5"> < </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC7"> > </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[ << ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ >> ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<H4> OPTIONS </H4>
|
||||
<!--docid::SEC6::-->
|
||||
<DL COMPACT>
|
||||
<DT><CODE>-c --stdout</CODE>
|
||||
<DD>Compress or decompress to standard output.
|
||||
<DT><CODE>-d --decompress</CODE>
|
||||
<DD>Force decompression. <CODE>bzip2</CODE>, <CODE>bunzip2</CODE> and <CODE>bzcat</CODE> are
|
||||
really the same program, and the decision about what actions to take is
|
||||
done on the basis of which name is used. This flag overrides that
|
||||
mechanism, and forces bzip2 to decompress.
|
||||
<DT><CODE>-z --compress</CODE>
|
||||
<DD>The complement to <CODE>-d</CODE>: forces compression, regardless of the
|
||||
invokation name.
|
||||
<DT><CODE>-t --test</CODE>
|
||||
<DD>Check integrity of the specified file(s), but don't decompress them.
|
||||
This really performs a trial decompression and throws away the result.
|
||||
<DT><CODE>-f --force</CODE>
|
||||
<DD>Force overwrite of output files. Normally, <CODE>bzip2</CODE> will not overwrite
|
||||
existing output files. Also forces <CODE>bzip2</CODE> to break hard links
|
||||
to files, which it otherwise wouldn't do.
|
||||
<P>
|
||||
|
||||
<CODE>bzip2</CODE> normally declines to decompress files which don't have the
|
||||
correct magic header bytes. If forced (<CODE>-f</CODE>), however, it will
|
||||
pass such files through unmodified. This is how GNU <CODE>gzip</CODE>
|
||||
behaves.
|
||||
<DT><CODE>-k --keep</CODE>
|
||||
<DD>Keep (don't delete) input files during compression
|
||||
or decompression.
|
||||
<DT><CODE>-s --small</CODE>
|
||||
<DD>Reduce memory usage, for compression, decompression and testing. Files
|
||||
are decompressed and tested using a modified algorithm which only
|
||||
requires 2.5 bytes per block byte. This means any file can be
|
||||
decompressed in 2300k of memory, albeit at about half the normal speed.
|
||||
<P>
|
||||
|
||||
During compression, <CODE>-s</CODE> selects a block size of 200k, which limits
|
||||
memory use to around the same figure, at the expense of your compression
|
||||
ratio. In short, if your machine is low on memory (8 megabytes or
|
||||
less), use -s for everything. See MEMORY MANAGEMENT below.
|
||||
<DT><CODE>-q --quiet</CODE>
|
||||
<DD>Suppress non-essential warning messages. Messages pertaining to
|
||||
I/O errors and other critical events will not be suppressed.
|
||||
<DT><CODE>-v --verbose</CODE>
|
||||
<DD>Verbose mode -- show the compression ratio for each file processed.
|
||||
Further <CODE>-v</CODE>'s increase the verbosity level, spewing out lots of
|
||||
information which is primarily of interest for diagnostic purposes.
|
||||
<DT><CODE>-L --license -V --version</CODE>
|
||||
<DD>Display the software version, license terms and conditions.
|
||||
<DT><CODE>-1 (or --fast) to -9 (or --best)</CODE>
|
||||
<DD>Set the block size to 100 k, 200 k .. 900 k when compressing. Has no
|
||||
effect when decompressing. See MEMORY MANAGEMENT below.
|
||||
The <CODE>--fast</CODE> and <CODE>--best</CODE> aliases are primarily for GNU
|
||||
<CODE>gzip</CODE> compatibility. In particular, <CODE>--fast</CODE> doesn't make
|
||||
things significantly faster. And <CODE>--best</CODE> merely selects the
|
||||
default behaviour.
|
||||
<DT><CODE>--</CODE>
|
||||
<DD>Treats all subsequent arguments as file names, even if they start
|
||||
with a dash. This is so you can handle files with names beginning
|
||||
with a dash, for example: <CODE>bzip2 -- -myfilename</CODE>.
|
||||
<DT><CODE>--repetitive-fast</CODE>
|
||||
<DD><DT><CODE>--repetitive-best</CODE>
|
||||
<DD>These flags are redundant in versions 0.9.5 and above. They provided
|
||||
some coarse control over the behaviour of the sorting algorithm in
|
||||
earlier versions, which was sometimes useful. 0.9.5 and above have an
|
||||
improved algorithm which renders these flags irrelevant.
|
||||
</DL>
|
||||
<P>
|
||||
|
||||
<HR SIZE="6">
|
||||
<A NAME="SEC7"></A>
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC6"> < </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC8"> > </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[ << ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ >> ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<H4> MEMORY MANAGEMENT </H4>
|
||||
<!--docid::SEC7::-->
|
||||
<P>
|
||||
|
||||
<CODE>bzip2</CODE> compresses large files in blocks. The block size affects
|
||||
both the compression ratio achieved, and the amount of memory needed for
|
||||
compression and decompression. The flags <CODE>-1</CODE> through <CODE>-9</CODE>
|
||||
specify the block size to be 100,000 bytes through 900,000 bytes (the
|
||||
default) respectively. At decompression time, the block size used for
|
||||
compression is read from the header of the compressed file, and
|
||||
<CODE>bunzip2</CODE> then allocates itself just enough memory to decompress
|
||||
the file. Since block sizes are stored in compressed files, it follows
|
||||
that the flags <CODE>-1</CODE> to <CODE>-9</CODE> are irrelevant to and so ignored
|
||||
during decompression.
|
||||
</P><P>
|
||||
|
||||
Compression and decompression requirements, in bytes, can be estimated
|
||||
as:
|
||||
<TABLE><tr><td> </td><td class=example><pre> Compression: 400k + ( 8 x block size )
|
||||
|
||||
Decompression: 100k + ( 4 x block size ), or
|
||||
100k + ( 2.5 x block size )
|
||||
</pre></td></tr></table>Larger block sizes give rapidly diminishing marginal returns. Most of
|
||||
the compression comes from the first two or three hundred k of block
|
||||
size, a fact worth bearing in mind when using <CODE>bzip2</CODE> on small machines.
|
||||
It is also important to appreciate that the decompression memory
|
||||
requirement is set at compression time by the choice of block size.
|
||||
</P><P>
|
||||
|
||||
For files compressed with the default 900k block size, <CODE>bunzip2</CODE>
|
||||
will require about 3700 kbytes to decompress. To support decompression
|
||||
of any file on a 4 megabyte machine, <CODE>bunzip2</CODE> has an option to
|
||||
decompress using approximately half this amount of memory, about 2300
|
||||
kbytes. Decompression speed is also halved, so you should use this
|
||||
option only where necessary. The relevant flag is <CODE>-s</CODE>.
|
||||
</P><P>
|
||||
|
||||
In general, try and use the largest block size memory constraints allow,
|
||||
since that maximises the compression achieved. Compression and
|
||||
decompression speed are virtually unaffected by block size.
|
||||
</P><P>
|
||||
|
||||
Another significant point applies to files which fit in a single block
|
||||
-- that means most files you'd encounter using a large block size. The
|
||||
amount of real memory touched is proportional to the size of the file,
|
||||
since the file is smaller than a block. For example, compressing a file
|
||||
20,000 bytes long with the flag <CODE>-9</CODE> will cause the compressor to
|
||||
allocate around 7600k of memory, but only touch 400k + 20000 * 8 = 560
|
||||
kbytes of it. Similarly, the decompressor will allocate 3700k but only
|
||||
touch 100k + 20000 * 4 = 180 kbytes.
|
||||
</P><P>
|
||||
|
||||
Here is a table which summarises the maximum memory usage for different
|
||||
block sizes. Also recorded is the total compressed size for 14 files of
|
||||
the Calgary Text Compression Corpus totalling 3,141,622 bytes. This
|
||||
column gives some feel for how compression varies with block size.
|
||||
These figures tend to understate the advantage of larger block sizes for
|
||||
larger files, since the Corpus is dominated by smaller files.
|
||||
<TABLE><tr><td> </td><td class=example><pre> Compress Decompress Decompress Corpus
|
||||
Flag usage usage -s usage Size
|
||||
|
||||
-1 1200k 500k 350k 914704
|
||||
-2 2000k 900k 600k 877703
|
||||
-3 2800k 1300k 850k 860338
|
||||
-4 3600k 1700k 1100k 846899
|
||||
-5 4400k 2100k 1350k 845160
|
||||
-6 5200k 2500k 1600k 838626
|
||||
-7 6100k 2900k 1850k 834096
|
||||
-8 6800k 3300k 2100k 828642
|
||||
-9 7600k 3700k 2350k 828642
|
||||
</pre></td></tr></table></P><P>
|
||||
|
||||
<HR SIZE="6">
|
||||
<A NAME="SEC8"></A>
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC7"> < </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC9"> > </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[ << ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ >> ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<H4> RECOVERING DATA FROM DAMAGED FILES </H4>
|
||||
<!--docid::SEC8::-->
|
||||
<P>
|
||||
|
||||
<CODE>bzip2</CODE> compresses files in blocks, usually 900kbytes long. Each
|
||||
block is handled independently. If a media or transmission error causes
|
||||
a multi-block <CODE>.bz2</CODE> file to become damaged, it may be possible to
|
||||
recover data from the undamaged blocks in the file.
|
||||
</P><P>
|
||||
|
||||
The compressed representation of each block is delimited by a 48-bit
|
||||
pattern, which makes it possible to find the block boundaries with
|
||||
reasonable certainty. Each block also carries its own 32-bit CRC, so
|
||||
damaged blocks can be distinguished from undamaged ones.
|
||||
</P><P>
|
||||
|
||||
<CODE>bzip2recover</CODE> is a simple program whose purpose is to search for
|
||||
blocks in <CODE>.bz2</CODE> files, and write each block out into its own
|
||||
<CODE>.bz2</CODE> file. You can then use <CODE>bzip2 -t</CODE> to test the
|
||||
integrity of the resulting files, and decompress those which are
|
||||
undamaged.
|
||||
</P><P>
|
||||
|
||||
<CODE>bzip2recover</CODE>
|
||||
takes a single argument, the name of the damaged file, and writes a
|
||||
number of files <CODE>rec00001file.bz2</CODE>, <CODE>rec00002file.bz2</CODE>, etc,
|
||||
containing the extracted blocks. The output filenames are designed so
|
||||
that the use of wildcards in subsequent processing -- for example,
|
||||
<CODE>bzip2 -dc rec*file.bz2 > recovered_data</CODE> -- processes the files in
|
||||
the correct order.
|
||||
</P><P>
|
||||
|
||||
<CODE>bzip2recover</CODE> should be of most use dealing with large <CODE>.bz2</CODE>
|
||||
files, as these will contain many blocks. It is clearly futile to use
|
||||
it on damaged single-block files, since a damaged block cannot be
|
||||
recovered. If you wish to minimise any potential data loss through
|
||||
media or transmission errors, you might consider compressing with a
|
||||
smaller block size.
|
||||
</P><P>
|
||||
|
||||
<HR SIZE="6">
|
||||
<A NAME="SEC9"></A>
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC8"> < </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC10"> > </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[ << ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ >> ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<H4> PERFORMANCE NOTES </H4>
|
||||
<!--docid::SEC9::-->
|
||||
<P>
|
||||
|
||||
The sorting phase of compression gathers together similar strings in the
|
||||
file. Because of this, files containing very long runs of repeated
|
||||
symbols, like "aabaabaabaab ..." (repeated several hundred times) may
|
||||
compress more slowly than normal. Versions 0.9.5 and above fare much
|
||||
better than previous versions in this respect. The ratio between
|
||||
worst-case and average-case compression time is in the region of 10:1.
|
||||
For previous versions, this figure was more like 100:1. You can use the
|
||||
<CODE>-vvvv</CODE> option to monitor progress in great detail, if you want.
|
||||
</P><P>
|
||||
|
||||
Decompression speed is unaffected by these phenomena.
|
||||
</P><P>
|
||||
|
||||
<CODE>bzip2</CODE> usually allocates several megabytes of memory to operate
|
||||
in, and then charges all over it in a fairly random fashion. This means
|
||||
that performance, both for compressing and decompressing, is largely
|
||||
determined by the speed at which your machine can service cache misses.
|
||||
Because of this, small changes to the code to reduce the miss rate have
|
||||
been observed to give disproportionately large performance improvements.
|
||||
I imagine <CODE>bzip2</CODE> will perform best on machines with very large
|
||||
caches.
|
||||
</P><P>
|
||||
|
||||
<HR SIZE="6">
|
||||
<A NAME="SEC10"></A>
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC9"> < </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC11"> > </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[ << ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ >> ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<H4> CAVEATS </H4>
|
||||
<!--docid::SEC10::-->
|
||||
<P>
|
||||
|
||||
I/O error messages are not as helpful as they could be. <CODE>bzip2</CODE>
|
||||
tries hard to detect I/O errors and exit cleanly, but the details of
|
||||
what the problem is sometimes seem rather misleading.
|
||||
</P><P>
|
||||
|
||||
This manual page pertains to version 1.0.2 of <CODE>bzip2</CODE>. Compressed
|
||||
data created by this version is entirely forwards and backwards
|
||||
compatible with the previous public releases, versions 0.1pl2, 0.9.0,
|
||||
0.9.5, 1.0.0 and 1.0.1, but with the following exception: 0.9.0 and
|
||||
above can correctly decompress multiple concatenated compressed files.
|
||||
0.1pl2 cannot do this; it will stop after decompressing just the first
|
||||
file in the stream.
|
||||
</P><P>
|
||||
|
||||
<CODE>bzip2recover</CODE> versions prior to this one, 1.0.2, used 32-bit
|
||||
integers to represent bit positions in compressed files, so it could not
|
||||
handle compressed files more than 512 megabytes long. Version 1.0.2 and
|
||||
above uses 64-bit ints on some platforms which support them (GNU
|
||||
supported targets, and Windows). To establish whether or not
|
||||
<CODE>bzip2recover</CODE> was built with such a limitation, run it without
|
||||
arguments. In any event you can build yourself an unlimited version if
|
||||
you can recompile it with <CODE>MaybeUInt64</CODE> set to be an unsigned
|
||||
64-bit integer.
|
||||
</P><P>
|
||||
|
||||
<HR SIZE="6">
|
||||
<A NAME="SEC11"></A>
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_2.html#SEC10"> < </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_3.html#SEC12"> > </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[ << ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ >> ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<H4> AUTHOR </H4>
|
||||
<!--docid::SEC11::-->
|
||||
Julian Seward, <CODE>jseward@acm.org</CODE>.
|
||||
<P>
|
||||
|
||||
<CODE>http://sources.redhat.com/bzip2</CODE>
|
||||
</P><P>
|
||||
|
||||
The ideas embodied in <CODE>bzip2</CODE> are due to (at least) the following
|
||||
people: Michael Burrows and David Wheeler (for the block sorting
|
||||
transformation), David Wheeler (again, for the Huffman coder), Peter
|
||||
Fenwick (for the structured coding model in the original <CODE>bzip</CODE>,
|
||||
and many refinements), and Alistair Moffat, Radford Neal and Ian Witten
|
||||
(for the arithmetic coder in the original <CODE>bzip</CODE>). I am much
|
||||
indebted for their help, support and advice. See the manual in the
|
||||
source distribution for pointers to sources of documentation. Christian
|
||||
von Roques encouraged me to look for faster sorting algorithms, so as to
|
||||
speed up compression. Bela Lubkin encouraged me to improve the
|
||||
worst-case compression performance. The <CODE>bz*</CODE> scripts are derived
|
||||
from those of GNU <CODE>gzip</CODE>. Many people sent patches, helped with
|
||||
portability problems, lent machines, gave advice and were generally
|
||||
helpful.
|
||||
</P><P>
|
||||
|
||||
</BLOCKQUOTE>
|
||||
|
||||
<HR SIZE="6">
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[ << ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ >> ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<BR>
|
||||
<FONT SIZE="-1">
|
||||
This document was generated
|
||||
by <I>Julian Seward</I> on <I>January, 5 2002</I>
|
||||
using <A HREF="http://www.mathematik.uni-kl.de/~obachman/Texi2html
|
||||
"><I>texi2html</I></A>
|
||||
|
||||
</BODY>
|
||||
</HTML>
|
1855
manual_3.html
1855
manual_3.html
File diff suppressed because it is too large
Load Diff
530
manual_4.html
530
manual_4.html
|
@ -1,530 +0,0 @@
|
|||
<HTML>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<!-- Created on January, 5 2002 by texi2html 1.64 -->
|
||||
<!--
|
||||
Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author)
|
||||
Karl Berry <karl@freefriends.org>
|
||||
Olaf Bachmann <obachman@mathematik.uni-kl.de>
|
||||
and many others.
|
||||
Maintained by: Olaf Bachmann <obachman@mathematik.uni-kl.de>
|
||||
Send bugs and suggestions to <texi2html@mathematik.uni-kl.de>
|
||||
|
||||
-->
|
||||
<HEAD>
|
||||
<TITLE>Untitled Document: 4. Miscellanea</TITLE>
|
||||
|
||||
<META NAME="description" CONTENT="Untitled Document: 4. Miscellanea">
|
||||
<META NAME="keywords" CONTENT="Untitled Document: 4. Miscellanea">
|
||||
<META NAME="resource-type" CONTENT="document">
|
||||
<META NAME="distribution" CONTENT="global">
|
||||
<META NAME="Generator" CONTENT="texi2html 1.64">
|
||||
|
||||
</HEAD>
|
||||
|
||||
<BODY LANG="" BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#800080" ALINK="#FF0000">
|
||||
|
||||
<A NAME="SEC43"></A>
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_3.html#SEC42"> < </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_4.html#SEC44"> > </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[ << ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ >> ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<H1> 4. Miscellanea </H1>
|
||||
<!--docid::SEC43::-->
|
||||
<P>
|
||||
|
||||
These are just some random thoughts of mine. Your mileage may
|
||||
vary.
|
||||
</P><P>
|
||||
|
||||
<HR SIZE="6">
|
||||
<A NAME="SEC44"></A>
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_4.html#SEC43"> < </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_4.html#SEC45"> > </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[ << ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ >> ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<H2> 4.1 Limitations of the compressed file format </H2>
|
||||
<!--docid::SEC44::-->
|
||||
<CODE>bzip2-1.0</CODE>, <CODE>0.9.5</CODE> and <CODE>0.9.0</CODE>
|
||||
use exactly the same file format as the previous
|
||||
version, <CODE>bzip2-0.1</CODE>. This decision was made in the interests of
|
||||
stability. Creating yet another incompatible compressed file format
|
||||
would create further confusion and disruption for users.
|
||||
<P>
|
||||
|
||||
Nevertheless, this is not a painless decision. Development
|
||||
work since the release of <CODE>bzip2-0.1</CODE> in August 1997
|
||||
has shown complexities in the file format which slow down
|
||||
decompression and, in retrospect, are unnecessary. These are:
|
||||
<UL>
|
||||
<LI>The run-length encoder, which is the first of the
|
||||
compression transformations, is entirely irrelevant.
|
||||
The original purpose was to protect the sorting algorithm
|
||||
from the very worst case input: a string of repeated
|
||||
symbols. But algorithm steps Q6a and Q6b in the original
|
||||
Burrows-Wheeler technical report (SRC-124) show how
|
||||
repeats can be handled without difficulty in block
|
||||
sorting.
|
||||
<LI>The randomisation mechanism doesn't really need to be
|
||||
there. Udi Manber and Gene Myers published a suffix
|
||||
array construction algorithm a few years back, which
|
||||
can be employed to sort any block, no matter how
|
||||
repetitive, in O(N log N) time. Subsequent work by
|
||||
Kunihiko Sadakane has produced a derivative O(N (log N)^2)
|
||||
algorithm which usually outperforms the Manber-Myers
|
||||
algorithm.
|
||||
<P>
|
||||
|
||||
I could have changed to Sadakane's algorithm, but I find
|
||||
it to be slower than <CODE>bzip2</CODE>'s existing algorithm for
|
||||
most inputs, and the randomisation mechanism protects
|
||||
adequately against bad cases. I didn't think it was
|
||||
a good tradeoff to make. Partly this is due to the fact
|
||||
that I was not flooded with email complaints about
|
||||
<CODE>bzip2-0.1</CODE>'s performance on repetitive data, so
|
||||
perhaps it isn't a problem for real inputs.
|
||||
</P><P>
|
||||
|
||||
Probably the best long-term solution,
|
||||
and the one I have incorporated into 0.9.5 and above,
|
||||
is to use the existing sorting
|
||||
algorithm initially, and fall back to a O(N (log N)^2)
|
||||
algorithm if the standard algorithm gets into difficulties.
|
||||
<LI>The compressed file format was never designed to be
|
||||
handled by a library, and I have had to jump though
|
||||
some hoops to produce an efficient implementation of
|
||||
decompression. It's a bit hairy. Try passing
|
||||
<CODE>decompress.c</CODE> through the C preprocessor
|
||||
and you'll see what I mean. Much of this complexity
|
||||
could have been avoided if the compressed size of
|
||||
each block of data was recorded in the data stream.
|
||||
<LI>An Adler-32 checksum, rather than a CRC32 checksum,
|
||||
would be faster to compute.
|
||||
</UL>
|
||||
It would be fair to say that the <CODE>bzip2</CODE> format was frozen
|
||||
before I properly and fully understood the performance
|
||||
consequences of doing so.
|
||||
<P>
|
||||
|
||||
Improvements which I was able to incorporate into
|
||||
0.9.0, despite using the same file format, are:
|
||||
<UL>
|
||||
<LI>Single array implementation of the inverse BWT. This
|
||||
significantly speeds up decompression, presumably
|
||||
because it reduces the number of cache misses.
|
||||
<LI>Faster inverse MTF transform for large MTF values. The
|
||||
new implementation is based on the notion of sliding blocks
|
||||
of values.
|
||||
<LI><CODE>bzip2-0.9.0</CODE> now reads and writes files with <CODE>fread</CODE>
|
||||
and <CODE>fwrite</CODE>; version 0.1 used <CODE>putc</CODE> and <CODE>getc</CODE>.
|
||||
Duh! Well, you live and learn.
|
||||
<P>
|
||||
|
||||
</UL>
|
||||
Further ahead, it would be nice
|
||||
to be able to do random access into files. This will
|
||||
require some careful design of compressed file formats.
|
||||
<P>
|
||||
|
||||
<HR SIZE="6">
|
||||
<A NAME="SEC45"></A>
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_4.html#SEC44"> < </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_4.html#SEC46"> > </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[ << ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ >> ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<H2> 4.2 Portability issues </H2>
|
||||
<!--docid::SEC45::-->
|
||||
After some consideration, I have decided not to use
|
||||
GNU <CODE>autoconf</CODE> to configure 0.9.5 or 1.0.
|
||||
<P>
|
||||
|
||||
<CODE>autoconf</CODE>, admirable and wonderful though it is,
|
||||
mainly assists with portability problems between Unix-like
|
||||
platforms. But <CODE>bzip2</CODE> doesn't have much in the way
|
||||
of portability problems on Unix; most of the difficulties appear
|
||||
when porting to the Mac, or to Microsoft's operating systems.
|
||||
<CODE>autoconf</CODE> doesn't help in those cases, and brings in a
|
||||
whole load of new complexity.
|
||||
</P><P>
|
||||
|
||||
Most people should be able to compile the library and program
|
||||
under Unix straight out-of-the-box, so to speak, especially
|
||||
if you have a version of GNU C available.
|
||||
</P><P>
|
||||
|
||||
There are a couple of <CODE>__inline__</CODE> directives in the code. GNU C
|
||||
(<CODE>gcc</CODE>) should be able to handle them. If you're not using
|
||||
GNU C, your C compiler shouldn't see them at all.
|
||||
If your compiler does, for some reason, see them and doesn't
|
||||
like them, just <CODE>#define</CODE> <CODE>__inline__</CODE> to be <CODE>/* */</CODE>. One
|
||||
easy way to do this is to compile with the flag <CODE>-D__inline__=</CODE>,
|
||||
which should be understood by most Unix compilers.
|
||||
</P><P>
|
||||
|
||||
If you still have difficulties, try compiling with the macro
|
||||
<CODE>BZ_STRICT_ANSI</CODE> defined. This should enable you to build the
|
||||
library in a strictly ANSI compliant environment. Building the program
|
||||
itself like this is dangerous and not supported, since you remove
|
||||
<CODE>bzip2</CODE>'s checks against compressing directories, symbolic links,
|
||||
devices, and other not-really-a-file entities. This could cause
|
||||
filesystem corruption!
|
||||
</P><P>
|
||||
|
||||
One other thing: if you create a <CODE>bzip2</CODE> binary for public
|
||||
distribution, please try and link it statically (<CODE>gcc -s</CODE>). This
|
||||
avoids all sorts of library-version issues that others may encounter
|
||||
later on.
|
||||
</P><P>
|
||||
|
||||
If you build <CODE>bzip2</CODE> on Win32, you must set <CODE>BZ_UNIX</CODE> to 0 and
|
||||
<CODE>BZ_LCCWIN32</CODE> to 1, in the file <CODE>bzip2.c</CODE>, before compiling.
|
||||
Otherwise the resulting binary won't work correctly.
|
||||
</P><P>
|
||||
|
||||
<HR SIZE="6">
|
||||
<A NAME="SEC46"></A>
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_4.html#SEC45"> < </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_4.html#SEC47"> > </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[ << ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ >> ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<H2> 4.3 Reporting bugs </H2>
|
||||
<!--docid::SEC46::-->
|
||||
I tried pretty hard to make sure <CODE>bzip2</CODE> is
|
||||
bug free, both by design and by testing. Hopefully
|
||||
you'll never need to read this section for real.
|
||||
<P>
|
||||
|
||||
Nevertheless, if <CODE>bzip2</CODE> dies with a segmentation
|
||||
fault, a bus error or an internal assertion failure, it
|
||||
will ask you to email me a bug report. Experience with
|
||||
version 0.1 shows that almost all these problems can
|
||||
be traced to either compiler bugs or hardware problems.
|
||||
<UL>
|
||||
<LI>
|
||||
Recompile the program with no optimisation, and see if it
|
||||
works. And/or try a different compiler.
|
||||
I heard all sorts of stories about various flavours
|
||||
of GNU C (and other compilers) generating bad code for
|
||||
<CODE>bzip2</CODE>, and I've run across two such examples myself.
|
||||
<P>
|
||||
|
||||
2.7.X versions of GNU C are known to generate bad code from
|
||||
time to time, at high optimisation levels.
|
||||
If you get problems, try using the flags
|
||||
<CODE>-O2</CODE> <CODE>-fomit-frame-pointer</CODE> <CODE>-fno-strength-reduce</CODE>.
|
||||
You should specifically <EM>not</EM> use <CODE>-funroll-loops</CODE>.
|
||||
</P><P>
|
||||
|
||||
You may notice that the Makefile runs six tests as part of
|
||||
the build process. If the program passes all of these, it's
|
||||
a pretty good (but not 100%) indication that the compiler has
|
||||
done its job correctly.
|
||||
<LI>
|
||||
If <CODE>bzip2</CODE> crashes randomly, and the crashes are not
|
||||
repeatable, you may have a flaky memory subsystem. <CODE>bzip2</CODE>
|
||||
really hammers your memory hierarchy, and if it's a bit marginal,
|
||||
you may get these problems. Ditto if your disk or I/O subsystem
|
||||
is slowly failing. Yup, this really does happen.
|
||||
<P>
|
||||
|
||||
Try using a different machine of the same type, and see if
|
||||
you can repeat the problem.
|
||||
<LI>This isn't really a bug, but ... If <CODE>bzip2</CODE> tells
|
||||
you your file is corrupted on decompression, and you
|
||||
obtained the file via FTP, there is a possibility that you
|
||||
forgot to tell FTP to do a binary mode transfer. That absolutely
|
||||
will cause the file to be non-decompressible. You'll have to transfer
|
||||
it again.
|
||||
</UL>
|
||||
<P>
|
||||
|
||||
If you've incorporated <CODE>libbzip2</CODE> into your own program
|
||||
and are getting problems, please, please, please, check that the
|
||||
parameters you are passing in calls to the library, are
|
||||
correct, and in accordance with what the documentation says
|
||||
is allowable. I have tried to make the library robust against
|
||||
such problems, but I'm sure I haven't succeeded.
|
||||
</P><P>
|
||||
|
||||
Finally, if the above comments don't help, you'll have to send
|
||||
me a bug report. Now, it's just amazing how many people will
|
||||
send me a bug report saying something like
|
||||
<TABLE><tr><td> </td><td class=display><pre style="font-family: serif"> bzip2 crashed with segmentation fault on my machine
|
||||
</pre></td></tr></table>and absolutely nothing else. Needless to say, a such a report
|
||||
is <EM>totally, utterly, completely and comprehensively 100% useless;
|
||||
a waste of your time, my time, and net bandwidth</EM>.
|
||||
With no details at all, there's no way I can possibly begin
|
||||
to figure out what the problem is.
|
||||
</P><P>
|
||||
|
||||
The rules of the game are: facts, facts, facts. Don't omit
|
||||
them because "oh, they won't be relevant". At the bare
|
||||
minimum:
|
||||
<TABLE><tr><td> </td><td class=display><pre style="font-family: serif"> Machine type. Operating system version.
|
||||
Exact version of <CODE>bzip2</CODE> (do <CODE>bzip2 -V</CODE>).
|
||||
Exact version of the compiler used.
|
||||
Flags passed to the compiler.
|
||||
</pre></td></tr></table>However, the most important single thing that will help me is
|
||||
the file that you were trying to compress or decompress at the
|
||||
time the problem happened. Without that, my ability to do anything
|
||||
more than speculate about the cause, is limited.
|
||||
</P><P>
|
||||
|
||||
Please remember that I connect to the Internet with a modem, so
|
||||
you should contact me before mailing me huge files.
|
||||
</P><P>
|
||||
|
||||
<HR SIZE="6">
|
||||
<A NAME="SEC47"></A>
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_4.html#SEC46"> < </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_4.html#SEC48"> > </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[ << ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ >> ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<H2> 4.4 Did you get the right package? </H2>
|
||||
<!--docid::SEC47::-->
|
||||
<P>
|
||||
|
||||
<CODE>bzip2</CODE> is a resource hog. It soaks up large amounts of CPU cycles
|
||||
and memory. Also, it gives very large latencies. In the worst case, you
|
||||
can feed many megabytes of uncompressed data into the library before
|
||||
getting any compressed output, so this probably rules out applications
|
||||
requiring interactive behaviour.
|
||||
</P><P>
|
||||
|
||||
These aren't faults of my implementation, I hope, but more
|
||||
an intrinsic property of the Burrows-Wheeler transform (unfortunately).
|
||||
Maybe this isn't what you want.
|
||||
</P><P>
|
||||
|
||||
If you want a compressor and/or library which is faster, uses less
|
||||
memory but gets pretty good compression, and has minimal latency,
|
||||
consider Jean-loup
|
||||
Gailly's and Mark Adler's work, <CODE>zlib-1.1.3</CODE> and
|
||||
<CODE>gzip-1.2.4</CODE>. Look for them at
|
||||
</P><P>
|
||||
|
||||
<CODE>http://www.zlib.org</CODE> and
|
||||
<CODE>http://www.gzip.org</CODE> respectively.
|
||||
</P><P>
|
||||
|
||||
For something faster and lighter still, you might try Markus F X J
|
||||
Oberhumer's <CODE>LZO</CODE> real-time compression/decompression library, at
|
||||
<BR> <CODE>http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html</CODE>.
|
||||
</P><P>
|
||||
|
||||
If you want to use the <CODE>bzip2</CODE> algorithms to compress small blocks
|
||||
of data, 64k bytes or smaller, for example on an on-the-fly disk
|
||||
compressor, you'd be well advised not to use this library. Instead,
|
||||
I've made a special library tuned for that kind of use. It's part of
|
||||
<CODE>e2compr-0.40</CODE>, an on-the-fly disk compressor for the Linux
|
||||
<CODE>ext2</CODE> filesystem. Look at
|
||||
<CODE>http://www.netspace.net.au/~reiter/e2compr</CODE>.
|
||||
</P><P>
|
||||
|
||||
<HR SIZE="6">
|
||||
<A NAME="SEC48"></A>
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_4.html#SEC47"> < </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_4.html#SEC49"> > </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[ << ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ >> ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<H2> 4.5 Testing </H2>
|
||||
<!--docid::SEC48::-->
|
||||
<P>
|
||||
|
||||
A record of the tests I've done.
|
||||
</P><P>
|
||||
|
||||
First, some data sets:
|
||||
<UL>
|
||||
<LI>B: a directory containing 6001 files, one for every length in the
|
||||
range 0 to 6000 bytes. The files contain random lowercase
|
||||
letters. 18.7 megabytes.
|
||||
<LI>H: my home directory tree. Documents, source code, mail files,
|
||||
compressed data. H contains B, and also a directory of
|
||||
files designed as boundary cases for the sorting; mostly very
|
||||
repetitive, nasty files. 565 megabytes.
|
||||
<LI>A: directory tree holding various applications built from source:
|
||||
<CODE>egcs</CODE>, <CODE>gcc-2.8.1</CODE>, KDE, GTK, Octave, etc.
|
||||
2200 megabytes.
|
||||
</UL>
|
||||
The tests conducted are as follows. Each test means compressing
|
||||
(a copy of) each file in the data set, decompressing it and
|
||||
comparing it against the original.
|
||||
<P>
|
||||
|
||||
First, a bunch of tests with block sizes and internal buffer
|
||||
sizes set very small,
|
||||
to detect any problems with the
|
||||
blocking and buffering mechanisms.
|
||||
This required modifying the source code so as to try to
|
||||
break it.
|
||||
<OL>
|
||||
<LI>Data set H, with
|
||||
buffer size of 1 byte, and block size of 23 bytes.
|
||||
<LI>Data set B, buffer sizes 1 byte, block size 1 byte.
|
||||
<LI>As (2) but small-mode decompression.
|
||||
<LI>As (2) with block size 2 bytes.
|
||||
<LI>As (2) with block size 3 bytes.
|
||||
<LI>As (2) with block size 4 bytes.
|
||||
<LI>As (2) with block size 5 bytes.
|
||||
<LI>As (2) with block size 6 bytes and small-mode decompression.
|
||||
<LI>H with buffer size of 1 byte, but normal block
|
||||
size (up to 900000 bytes).
|
||||
</OL>
|
||||
Then some tests with unmodified source code.
|
||||
<OL>
|
||||
<LI>H, all settings normal.
|
||||
<LI>As (1), with small-mode decompress.
|
||||
<LI>H, compress with flag <CODE>-1</CODE>.
|
||||
<LI>H, compress with flag <CODE>-s</CODE>, decompress with flag <CODE>-s</CODE>.
|
||||
<LI>Forwards compatibility: H, <CODE>bzip2-0.1pl2</CODE> compressing,
|
||||
<CODE>bzip2-0.9.5</CODE> decompressing, all settings normal.
|
||||
<LI>Backwards compatibility: H, <CODE>bzip2-0.9.5</CODE> compressing,
|
||||
<CODE>bzip2-0.1pl2</CODE> decompressing, all settings normal.
|
||||
<LI>Bigger tests: A, all settings normal.
|
||||
<LI>As (7), using the fallback (Sadakane-like) sorting algorithm.
|
||||
<LI>As (8), compress with flag <CODE>-1</CODE>, decompress with flag
|
||||
<CODE>-s</CODE>.
|
||||
<LI>H, using the fallback sorting algorithm.
|
||||
<LI>Forwards compatibility: A, <CODE>bzip2-0.1pl2</CODE> compressing,
|
||||
<CODE>bzip2-0.9.5</CODE> decompressing, all settings normal.
|
||||
<LI>Backwards compatibility: A, <CODE>bzip2-0.9.5</CODE> compressing,
|
||||
<CODE>bzip2-0.1pl2</CODE> decompressing, all settings normal.
|
||||
<LI>Misc test: about 400 megabytes of <CODE>.tar</CODE> files with
|
||||
<CODE>bzip2</CODE> compiled with Checker (a memory access error
|
||||
detector, like Purify).
|
||||
<LI>Misc tests to make sure it builds and runs ok on non-Linux/x86
|
||||
platforms.
|
||||
</OL>
|
||||
These tests were conducted on a 225 MHz IDT WinChip machine, running
|
||||
Linux 2.0.36. They represent nearly a week of continuous computation.
|
||||
All tests completed successfully.
|
||||
<P>
|
||||
|
||||
<HR SIZE="6">
|
||||
<A NAME="SEC49"></A>
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_4.html#SEC48"> < </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ > ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[ << ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top"> Up </A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ >> ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<H2> 4.6 Further reading </H2>
|
||||
<!--docid::SEC49::-->
|
||||
<CODE>bzip2</CODE> is not research work, in the sense that it doesn't present
|
||||
any new ideas. Rather, it's an engineering exercise based on existing
|
||||
ideas.
|
||||
<P>
|
||||
|
||||
Four documents describe essentially all the ideas behind <CODE>bzip2</CODE>:
|
||||
<TABLE><tr><td> </td><td class=example><pre>Michael Burrows and D. J. Wheeler:
|
||||
"A block-sorting lossless data compression algorithm"
|
||||
10th May 1994.
|
||||
Digital SRC Research Report 124.
|
||||
ftp://ftp.digital.com/pub/DEC/SRC/research-reports/SRC-124.ps.gz
|
||||
If you have trouble finding it, try searching at the
|
||||
New Zealand Digital Library, http://www.nzdl.org.
|
||||
|
||||
Daniel S. Hirschberg and Debra A. LeLewer
|
||||
"Efficient Decoding of Prefix Codes"
|
||||
Communications of the ACM, April 1990, Vol 33, Number 4.
|
||||
You might be able to get an electronic copy of this
|
||||
from the ACM Digital Library.
|
||||
|
||||
David J. Wheeler
|
||||
Program bred3.c and accompanying document bred3.ps.
|
||||
This contains the idea behind the multi-table Huffman
|
||||
coding scheme.
|
||||
ftp://ftp.cl.cam.ac.uk/users/djw3/
|
||||
|
||||
Jon L. Bentley and Robert Sedgewick
|
||||
"Fast Algorithms for Sorting and Searching Strings"
|
||||
Available from Sedgewick's web page,
|
||||
www.cs.princeton.edu/~rs
|
||||
</pre></td></tr></table>The following paper gives valuable additional insights into the
|
||||
algorithm, but is not immediately the basis of any code
|
||||
used in bzip2.
|
||||
<TABLE><tr><td> </td><td class=example><pre>Peter Fenwick:
|
||||
Block Sorting Text Compression
|
||||
Proceedings of the 19th Australasian Computer Science Conference,
|
||||
Melbourne, Australia. Jan 31 - Feb 2, 1996.
|
||||
ftp://ftp.cs.auckland.ac.nz/pub/peter-f/ACSC96paper.ps
|
||||
</pre></td></tr></table>Kunihiko Sadakane's sorting algorithm, mentioned above,
|
||||
is available from:
|
||||
<TABLE><tr><td> </td><td class=example><pre>http://naomi.is.s.u-tokyo.ac.jp/~sada/papers/Sada98b.ps.gz
|
||||
</pre></td></tr></table>The Manber-Myers suffix array construction
|
||||
algorithm is described in a paper
|
||||
available from:
|
||||
<TABLE><tr><td> </td><td class=example><pre>http://www.cs.arizona.edu/people/gene/PAPERS/suffix.ps
|
||||
</pre></td></tr></table>Finally, the following paper documents some recent investigations
|
||||
I made into the performance of sorting algorithms:
|
||||
<TABLE><tr><td> </td><td class=example><pre>Julian Seward:
|
||||
On the Performance of BWT Sorting Algorithms
|
||||
Proceedings of the IEEE Data Compression Conference 2000
|
||||
Snowbird, Utah. 28-30 March 2000.
|
||||
</pre></td></tr></table></P><P>
|
||||
|
||||
<HR SIZE="6">
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[ << ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[ >> ]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT"> <TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<BR>
|
||||
<FONT SIZE="-1">
|
||||
This document was generated
|
||||
by <I>Julian Seward</I> on <I>January, 5 2002</I>
|
||||
using <A HREF="http://www.mathematik.uni-kl.de/~obachman/Texi2html
|
||||
"><I>texi2html</I></A>
|
||||
|
||||
</BODY>
|
||||
</HTML>
|
201
manual_abt.html
201
manual_abt.html
|
@ -1,201 +0,0 @@
|
|||
<HTML>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<!-- Created on January, 5 2002 by texi2html 1.64 -->
|
||||
<!--
|
||||
Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author)
|
||||
Karl Berry <karl@freefriends.org>
|
||||
Olaf Bachmann <obachman@mathematik.uni-kl.de>
|
||||
and many others.
|
||||
Maintained by: Olaf Bachmann <obachman@mathematik.uni-kl.de>
|
||||
Send bugs and suggestions to <texi2html@mathematik.uni-kl.de>
|
||||
|
||||
-->
|
||||
<HEAD>
|
||||
<TITLE>Untitled Document: About this document</TITLE>
|
||||
|
||||
<META NAME="description" CONTENT="Untitled Document: About this document">
|
||||
<META NAME="keywords" CONTENT="Untitled Document: About this document">
|
||||
<META NAME="resource-type" CONTENT="document">
|
||||
<META NAME="distribution" CONTENT="global">
|
||||
<META NAME="Generator" CONTENT="texi2html 1.64">
|
||||
|
||||
</HEAD>
|
||||
|
||||
<BODY LANG="" BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#800080" ALINK="#FF0000">
|
||||
|
||||
<A NAME="SEC_About"></A>
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<H1>About this document</H1>
|
||||
This document was generated by <I>Julian Seward</I> on <I>January, 5 2002</I>
|
||||
using <A HREF="http://www.mathematik.uni-kl.de/~obachman/Texi2html
|
||||
"><I>texi2html</I></A>
|
||||
<P></P>
|
||||
The buttons in the navigation panels have the following meaning:
|
||||
<P></P>
|
||||
<table border = "1">
|
||||
<TR>
|
||||
<TH> Button </TH>
|
||||
<TH> Name </TH>
|
||||
<TH> Go to </TH>
|
||||
<TH> From 1.2.3 go to</TH>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD ALIGN="CENTER">
|
||||
[ < ] </TD>
|
||||
<TD ALIGN="CENTER">
|
||||
Back
|
||||
</TD>
|
||||
<TD>
|
||||
previous section in reading order
|
||||
</TD>
|
||||
<TD>
|
||||
1.2.2
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD ALIGN="CENTER">
|
||||
[ > ] </TD>
|
||||
<TD ALIGN="CENTER">
|
||||
Forward
|
||||
</TD>
|
||||
<TD>
|
||||
next section in reading order
|
||||
</TD>
|
||||
<TD>
|
||||
1.2.4
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD ALIGN="CENTER">
|
||||
[ << ] </TD>
|
||||
<TD ALIGN="CENTER">
|
||||
FastBack
|
||||
</TD>
|
||||
<TD>
|
||||
previous or up-and-previous section
|
||||
</TD>
|
||||
<TD>
|
||||
1.1
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD ALIGN="CENTER">
|
||||
[ Up ] </TD>
|
||||
<TD ALIGN="CENTER">
|
||||
Up
|
||||
</TD>
|
||||
<TD>
|
||||
up section
|
||||
</TD>
|
||||
<TD>
|
||||
1.2
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD ALIGN="CENTER">
|
||||
[ >> ] </TD>
|
||||
<TD ALIGN="CENTER">
|
||||
FastForward
|
||||
</TD>
|
||||
<TD>
|
||||
next or up-and-next section
|
||||
</TD>
|
||||
<TD>
|
||||
1.3
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD ALIGN="CENTER">
|
||||
[Top] </TD>
|
||||
<TD ALIGN="CENTER">
|
||||
Top
|
||||
</TD>
|
||||
<TD>
|
||||
cover (top) of document
|
||||
</TD>
|
||||
<TD>
|
||||
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD ALIGN="CENTER">
|
||||
[Contents] </TD>
|
||||
<TD ALIGN="CENTER">
|
||||
Contents
|
||||
</TD>
|
||||
<TD>
|
||||
table of contents
|
||||
</TD>
|
||||
<TD>
|
||||
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD ALIGN="CENTER">
|
||||
[Index] </TD>
|
||||
<TD ALIGN="CENTER">
|
||||
Index
|
||||
</TD>
|
||||
<TD>
|
||||
concept index
|
||||
</TD>
|
||||
<TD>
|
||||
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD ALIGN="CENTER">
|
||||
[ ? ] </TD>
|
||||
<TD ALIGN="CENTER">
|
||||
About
|
||||
</TD>
|
||||
<TD>
|
||||
this page
|
||||
</TD>
|
||||
<TD>
|
||||
|
||||
</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
<P></P>
|
||||
where the <STRONG> Example </STRONG> assumes that the current position
|
||||
is at <STRONG> Subsubsection One-Two-Three </STRONG> of a document of
|
||||
the following structure:
|
||||
<UL>
|
||||
<LI> 1. Section One </LI>
|
||||
<UL>
|
||||
<LI>1.1 Subsection One-One</LI>
|
||||
<UL>
|
||||
<LI> ... </LI>
|
||||
</UL>
|
||||
<LI>1.2 Subsection One-Two</LI>
|
||||
<UL>
|
||||
<LI>1.2.1 Subsubsection One-Two-One
|
||||
</LI><LI>1.2.2 Subsubsection One-Two-Two
|
||||
</LI><LI>1.2.3 Subsubsection One-Two-Three <STRONG>
|
||||
<== Current Position </STRONG>
|
||||
</LI><LI>1.2.4 Subsubsection One-Two-Four
|
||||
</LI></UL>
|
||||
<LI>1.3 Subsection One-Three</LI>
|
||||
<UL>
|
||||
<LI> ... </LI>
|
||||
</UL>
|
||||
<LI>1.4 Subsection One-Four</LI>
|
||||
</UL>
|
||||
</UL>
|
||||
|
||||
<HR SIZE=1>
|
||||
<BR>
|
||||
<FONT SIZE="-1">
|
||||
This document was generated
|
||||
by <I>Julian Seward</I> on <I>January, 5 2002</I>
|
||||
using <A HREF="http://www.mathematik.uni-kl.de/~obachman/Texi2html
|
||||
"><I>texi2html</I></A>
|
||||
|
||||
</BODY>
|
||||
</HTML>
|
|
@ -1,54 +0,0 @@
|
|||
<HTML>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<!-- Created on January, 5 2002 by texi2html 1.64 -->
|
||||
<!--
|
||||
Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author)
|
||||
Karl Berry <karl@freefriends.org>
|
||||
Olaf Bachmann <obachman@mathematik.uni-kl.de>
|
||||
and many others.
|
||||
Maintained by: Olaf Bachmann <obachman@mathematik.uni-kl.de>
|
||||
Send bugs and suggestions to <texi2html@mathematik.uni-kl.de>
|
||||
|
||||
-->
|
||||
<HEAD>
|
||||
<TITLE>Untitled Document: Short Table of Contents</TITLE>
|
||||
|
||||
<META NAME="description" CONTENT="Untitled Document: Short Table of Contents">
|
||||
<META NAME="keywords" CONTENT="Untitled Document: Short Table of Contents">
|
||||
<META NAME="resource-type" CONTENT="document">
|
||||
<META NAME="distribution" CONTENT="global">
|
||||
<META NAME="Generator" CONTENT="texi2html 1.64">
|
||||
|
||||
</HEAD>
|
||||
|
||||
<BODY LANG="" BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#800080" ALINK="#FF0000">
|
||||
|
||||
<A NAME="SEC_OVERVIEW"></A>
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<H1>Short Table of Contents</H1>
|
||||
<BLOCKQUOTE>
|
||||
<A NAME="TOC1" HREF="manual_1.html#SEC1">1. Introduction</A>
|
||||
<BR>
|
||||
<A NAME="TOC2" HREF="manual_2.html#SEC2">2. How to use <CODE>bzip2</CODE></A>
|
||||
<BR>
|
||||
<A NAME="TOC12" HREF="manual_3.html#SEC12">3. Programming with <CODE>libbzip2</CODE></A>
|
||||
<BR>
|
||||
<A NAME="TOC43" HREF="manual_4.html#SEC43">4. Miscellanea</A>
|
||||
<BR>
|
||||
|
||||
</BLOCKQUOTE>
|
||||
<HR SIZE=1>
|
||||
<BR>
|
||||
<FONT SIZE="-1">
|
||||
This document was generated
|
||||
by <I>Julian Seward</I> on <I>January, 5 2002</I>
|
||||
using <A HREF="http://www.mathematik.uni-kl.de/~obachman/Texi2html
|
||||
"><I>texi2html</I></A>
|
||||
|
||||
</BODY>
|
||||
</HTML>
|
163
manual_toc.html
163
manual_toc.html
|
@ -1,163 +0,0 @@
|
|||
<HTML>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<!-- Created on January, 5 2002 by texi2html 1.64 -->
|
||||
<!--
|
||||
Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author)
|
||||
Karl Berry <karl@freefriends.org>
|
||||
Olaf Bachmann <obachman@mathematik.uni-kl.de>
|
||||
and many others.
|
||||
Maintained by: Olaf Bachmann <obachman@mathematik.uni-kl.de>
|
||||
Send bugs and suggestions to <texi2html@mathematik.uni-kl.de>
|
||||
|
||||
-->
|
||||
<HEAD>
|
||||
<TITLE>Untitled Document: Table of Contents</TITLE>
|
||||
|
||||
<META NAME="description" CONTENT="Untitled Document: Table of Contents">
|
||||
<META NAME="keywords" CONTENT="Untitled Document: Table of Contents">
|
||||
<META NAME="resource-type" CONTENT="document">
|
||||
<META NAME="distribution" CONTENT="global">
|
||||
<META NAME="Generator" CONTENT="texi2html 1.64">
|
||||
|
||||
</HEAD>
|
||||
|
||||
<BODY LANG="" BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#800080" ALINK="#FF0000">
|
||||
|
||||
<A NAME="SEC_Contents"></A>
|
||||
<TABLE CELLPADDING=1 CELLSPACING=1 BORDER=0>
|
||||
<TR><TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual.html#SEC_Top">Top</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_toc.html#SEC_Contents">Contents</A>]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[Index]</TD>
|
||||
<TD VALIGN="MIDDLE" ALIGN="LEFT">[<A HREF="manual_abt.html#SEC_About"> ? </A>]</TD>
|
||||
</TR></TABLE>
|
||||
<H1>Table of Contents</H1>
|
||||
<UL>
|
||||
<A NAME="TOC1" HREF="manual_1.html#SEC1">1. Introduction</A>
|
||||
<BR>
|
||||
<A NAME="TOC2" HREF="manual_2.html#SEC2">2. How to use <CODE>bzip2</CODE></A>
|
||||
<BR>
|
||||
<UL>
|
||||
<UL>
|
||||
<UL>
|
||||
<A NAME="TOC3" HREF="manual_2.html#SEC3">NAME</A>
|
||||
<BR>
|
||||
<A NAME="TOC4" HREF="manual_2.html#SEC4">SYNOPSIS</A>
|
||||
<BR>
|
||||
<A NAME="TOC5" HREF="manual_2.html#SEC5">DESCRIPTION</A>
|
||||
<BR>
|
||||
<A NAME="TOC6" HREF="manual_2.html#SEC6">OPTIONS</A>
|
||||
<BR>
|
||||
<A NAME="TOC7" HREF="manual_2.html#SEC7">MEMORY MANAGEMENT</A>
|
||||
<BR>
|
||||
<A NAME="TOC8" HREF="manual_2.html#SEC8">RECOVERING DATA FROM DAMAGED FILES</A>
|
||||
<BR>
|
||||
<A NAME="TOC9" HREF="manual_2.html#SEC9">PERFORMANCE NOTES</A>
|
||||
<BR>
|
||||
<A NAME="TOC10" HREF="manual_2.html#SEC10">CAVEATS</A>
|
||||
<BR>
|
||||
<A NAME="TOC11" HREF="manual_2.html#SEC11">AUTHOR</A>
|
||||
<BR>
|
||||
</UL>
|
||||
</UL>
|
||||
</UL>
|
||||
<A NAME="TOC12" HREF="manual_3.html#SEC12">3. Programming with <CODE>libbzip2</CODE></A>
|
||||
<BR>
|
||||
<UL>
|
||||
<A NAME="TOC13" HREF="manual_3.html#SEC13">3.1 Top-level structure</A>
|
||||
<BR>
|
||||
<UL>
|
||||
<A NAME="TOC14" HREF="manual_3.html#SEC14">3.1.1 Low-level summary</A>
|
||||
<BR>
|
||||
<A NAME="TOC15" HREF="manual_3.html#SEC15">3.1.2 High-level summary</A>
|
||||
<BR>
|
||||
<A NAME="TOC16" HREF="manual_3.html#SEC16">3.1.3 Utility functions summary</A>
|
||||
<BR>
|
||||
</UL>
|
||||
<A NAME="TOC17" HREF="manual_3.html#SEC17">3.2 Error handling</A>
|
||||
<BR>
|
||||
<A NAME="TOC18" HREF="manual_3.html#SEC18">3.3 Low-level interface</A>
|
||||
<BR>
|
||||
<UL>
|
||||
<A NAME="TOC19" HREF="manual_3.html#SEC19">3.3.1 <CODE>BZ2_bzCompressInit</CODE></A>
|
||||
<BR>
|
||||
<A NAME="TOC20" HREF="manual_3.html#SEC20">3.3.2 <CODE>BZ2_bzCompress</CODE></A>
|
||||
<BR>
|
||||
<A NAME="TOC21" HREF="manual_3.html#SEC21">3.3.3 <CODE>BZ2_bzCompressEnd</CODE></A>
|
||||
<BR>
|
||||
<A NAME="TOC22" HREF="manual_3.html#SEC22">3.3.4 <CODE>BZ2_bzDecompressInit</CODE></A>
|
||||
<BR>
|
||||
<A NAME="TOC23" HREF="manual_3.html#SEC23">3.3.5 <CODE>BZ2_bzDecompress</CODE></A>
|
||||
<BR>
|
||||
<A NAME="TOC24" HREF="manual_3.html#SEC24">3.3.6 <CODE>BZ2_bzDecompressEnd</CODE></A>
|
||||
<BR>
|
||||
</UL>
|
||||
<A NAME="TOC25" HREF="manual_3.html#SEC25">3.4 High-level interface</A>
|
||||
<BR>
|
||||
<UL>
|
||||
<A NAME="TOC26" HREF="manual_3.html#SEC26">3.4.1 <CODE>BZ2_bzReadOpen</CODE></A>
|
||||
<BR>
|
||||
<A NAME="TOC27" HREF="manual_3.html#SEC27">3.4.2 <CODE>BZ2_bzRead</CODE></A>
|
||||
<BR>
|
||||
<A NAME="TOC28" HREF="manual_3.html#SEC28">3.4.3 <CODE>BZ2_bzReadGetUnused</CODE></A>
|
||||
<BR>
|
||||
<A NAME="TOC29" HREF="manual_3.html#SEC29">3.4.4 <CODE>BZ2_bzReadClose</CODE></A>
|
||||
<BR>
|
||||
<A NAME="TOC30" HREF="manual_3.html#SEC30">3.4.5 <CODE>BZ2_bzWriteOpen</CODE></A>
|
||||
<BR>
|
||||
<A NAME="TOC31" HREF="manual_3.html#SEC31">3.4.6 <CODE>BZ2_bzWrite</CODE></A>
|
||||
<BR>
|
||||
<A NAME="TOC32" HREF="manual_3.html#SEC32">3.4.7 <CODE>BZ2_bzWriteClose</CODE></A>
|
||||
<BR>
|
||||
<A NAME="TOC33" HREF="manual_3.html#SEC33">3.4.8 Handling embedded compressed data streams</A>
|
||||
<BR>
|
||||
<A NAME="TOC34" HREF="manual_3.html#SEC34">3.4.9 Standard file-reading/writing code</A>
|
||||
<BR>
|
||||
</UL>
|
||||
<A NAME="TOC35" HREF="manual_3.html#SEC35">3.5 Utility functions</A>
|
||||
<BR>
|
||||
<UL>
|
||||
<A NAME="TOC36" HREF="manual_3.html#SEC36">3.5.1 <CODE>BZ2_bzBuffToBuffCompress</CODE></A>
|
||||
<BR>
|
||||
<A NAME="TOC37" HREF="manual_3.html#SEC37">3.5.2 <CODE>BZ2_bzBuffToBuffDecompress</CODE></A>
|
||||
<BR>
|
||||
</UL>
|
||||
<A NAME="TOC38" HREF="manual_3.html#SEC38">3.6 <CODE>zlib</CODE> compatibility functions</A>
|
||||
<BR>
|
||||
<A NAME="TOC39" HREF="manual_3.html#SEC39">3.7 Using the library in a <CODE>stdio</CODE>-free environment</A>
|
||||
<BR>
|
||||
<UL>
|
||||
<A NAME="TOC40" HREF="manual_3.html#SEC40">3.7.1 Getting rid of <CODE>stdio</CODE></A>
|
||||
<BR>
|
||||
<A NAME="TOC41" HREF="manual_3.html#SEC41">3.7.2 Critical error handling</A>
|
||||
<BR>
|
||||
</UL>
|
||||
<A NAME="TOC42" HREF="manual_3.html#SEC42">3.8 Making a Windows DLL</A>
|
||||
<BR>
|
||||
</UL>
|
||||
<A NAME="TOC43" HREF="manual_4.html#SEC43">4. Miscellanea</A>
|
||||
<BR>
|
||||
<UL>
|
||||
<A NAME="TOC44" HREF="manual_4.html#SEC44">4.1 Limitations of the compressed file format</A>
|
||||
<BR>
|
||||
<A NAME="TOC45" HREF="manual_4.html#SEC45">4.2 Portability issues</A>
|
||||
<BR>
|
||||
<A NAME="TOC46" HREF="manual_4.html#SEC46">4.3 Reporting bugs</A>
|
||||
<BR>
|
||||
<A NAME="TOC47" HREF="manual_4.html#SEC47">4.4 Did you get the right package?</A>
|
||||
<BR>
|
||||
<A NAME="TOC48" HREF="manual_4.html#SEC48">4.5 Testing</A>
|
||||
<BR>
|
||||
<A NAME="TOC49" HREF="manual_4.html#SEC49">4.6 Further reading</A>
|
||||
<BR>
|
||||
</UL>
|
||||
</UL>
|
||||
<HR SIZE=1>
|
||||
<BR>
|
||||
<FONT SIZE="-1">
|
||||
This document was generated
|
||||
by <I>Julian Seward</I> on <I>January, 5 2002</I>
|
||||
using <A HREF="http://www.mathematik.uni-kl.de/~obachman/Texi2html
|
||||
"><I>texi2html</I></A>
|
||||
|
||||
</BODY>
|
||||
</HTML>
|
16
mk251.c
16
mk251.c
|
@ -1,16 +0,0 @@
|
|||
|
||||
/* Spew out a long sequence of the byte 251. When fed to bzip2
|
||||
versions 1.0.0 or 1.0.1, causes it to die with internal error
|
||||
1007 in blocksort.c. This assertion misses an extremely rare
|
||||
case, which is fixed in this version (1.0.2) and above.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int main ()
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 48500000 ; i++)
|
||||
putchar(251);
|
||||
return 0;
|
||||
}
|
|
@ -63,7 +63,11 @@
|
|||
|
||||
|
||||
/*---------------------------------------------*/
|
||||
Int32 BZ2_rNums[512] = {
|
||||
#ifdef __ORCAC__
|
||||
Int16 BZ2_rNums[512] = {
|
||||
#else
|
||||
Int32 BZ2_rNums[512] = {
|
||||
#endif
|
||||
619, 720, 127, 481, 931, 816, 813, 233, 566, 247,
|
||||
985, 724, 205, 454, 863, 491, 741, 242, 949, 214,
|
||||
733, 859, 335, 708, 621, 574, 73, 654, 730, 472,
|
||||
|
|
39
spewG.c
39
spewG.c
|
@ -1,39 +0,0 @@
|
|||
|
||||
/* spew out a thoroughly gigantic file designed so that bzip2
|
||||
can compress it reasonably rapidly. This is to help test
|
||||
support for large files (> 2GB) in a reasonable amount of time.
|
||||
I suggest you use the undocumented --exponential option to
|
||||
bzip2 when compressing the resulting file; this saves a bit of
|
||||
time. Note: *don't* bother with --exponential when compressing
|
||||
Real Files; it'll just waste a lot of CPU time :-)
|
||||
(but is otherwise harmless).
|
||||
*/
|
||||
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/* The number of megabytes of junk to spew out (roughly) */
|
||||
#define MEGABYTES 5000
|
||||
|
||||
#define N_BUF 1000000
|
||||
char buf[N_BUF];
|
||||
|
||||
int main ( int argc, char** argv )
|
||||
{
|
||||
int ii, kk, p;
|
||||
srandom(1);
|
||||
setbuffer ( stdout, buf, N_BUF );
|
||||
for (kk = 0; kk < MEGABYTES * 515; kk+=3) {
|
||||
p = 25+random()%50;
|
||||
for (ii = 0; ii < p; ii++)
|
||||
printf ( "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" );
|
||||
for (ii = 0; ii < p-1; ii++)
|
||||
printf ( "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" );
|
||||
for (ii = 0; ii < p+1; ii++)
|
||||
printf ( "ccccccccccccccccccccccccccccccccccccc" );
|
||||
}
|
||||
fflush(stdout);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
/* Case-insensitive version of strstr() obtained from http://snippets.org */
|
||||
|
||||
#ifdef __ORCAC__
|
||||
segment "bzip2";
|
||||
#endif
|
||||
|
||||
/*
|
||||
** Designation: stristr
|
||||
**
|
||||
** Call syntax: char *stristr(char *String, char *Pattern)
|
||||
**
|
||||
** Description: This function is an ANSI version of strstr() with
|
||||
** case insensitivity. (Functionally equivalent to
|
||||
** the strcasestr function in some C libraries.)
|
||||
**
|
||||
** Return item: char *pointer if Pattern is found in String, else
|
||||
** null pointer
|
||||
**
|
||||
** Rev History: 07/06/03 Stephen Heumann Used in bunzip2 for GNO
|
||||
** 16/04/03 ? ?
|
||||
** 16/07/97 Greg Thayer Optimized
|
||||
** 07/04/95 Bob Stout ANSI-fy
|
||||
** 02/03/94 Fred Cole Original
|
||||
**
|
||||
** Hereby donated to public domain.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
char *stristr(const char *String, const char *Pattern)
|
||||
{
|
||||
char *pptr, *sptr, *start;
|
||||
|
||||
for (start = (char *)String; *start != '\0'; start++)
|
||||
{
|
||||
/* find start of pattern in string */
|
||||
for ( ; ((*start!='\0') && (toupper(*start) != toupper(*Pattern)));
|
||||
start++)
|
||||
;
|
||||
|
||||
pptr = (char *)Pattern;
|
||||
sptr = (char *)start;
|
||||
|
||||
while (toupper(*sptr) == toupper(*pptr))
|
||||
{
|
||||
sptr++;
|
||||
pptr++;
|
||||
|
||||
/* if end of pattern then pattern was found */
|
||||
|
||||
if ('\0' == *pptr)
|
||||
return (start);
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
126
unzcrash.c
126
unzcrash.c
|
@ -1,126 +0,0 @@
|
|||
|
||||
/* A test program written to test robustness to decompression of
|
||||
corrupted data. Usage is
|
||||
unzcrash filename
|
||||
and the program will read the specified file, compress it (in memory),
|
||||
and then repeatedly decompress it, each time with a different bit of
|
||||
the compressed data inverted, so as to test all possible one-bit errors.
|
||||
This should not cause any invalid memory accesses. If it does,
|
||||
I want to know about it!
|
||||
|
||||
p.s. As you can see from the above description, the process is
|
||||
incredibly slow. A file of size eg 5KB will cause it to run for
|
||||
many hours.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include "bzlib.h"
|
||||
|
||||
#define M_BLOCK 1000000
|
||||
|
||||
typedef unsigned char uchar;
|
||||
|
||||
#define M_BLOCK_OUT (M_BLOCK + 1000000)
|
||||
uchar inbuf[M_BLOCK];
|
||||
uchar outbuf[M_BLOCK_OUT];
|
||||
uchar zbuf[M_BLOCK + 600 + (M_BLOCK / 100)];
|
||||
|
||||
int nIn, nOut, nZ;
|
||||
|
||||
static char *bzerrorstrings[] = {
|
||||
"OK"
|
||||
,"SEQUENCE_ERROR"
|
||||
,"PARAM_ERROR"
|
||||
,"MEM_ERROR"
|
||||
,"DATA_ERROR"
|
||||
,"DATA_ERROR_MAGIC"
|
||||
,"IO_ERROR"
|
||||
,"UNEXPECTED_EOF"
|
||||
,"OUTBUFF_FULL"
|
||||
,"???" /* for future */
|
||||
,"???" /* for future */
|
||||
,"???" /* for future */
|
||||
,"???" /* for future */
|
||||
,"???" /* for future */
|
||||
,"???" /* for future */
|
||||
};
|
||||
|
||||
void flip_bit ( int bit )
|
||||
{
|
||||
int byteno = bit / 8;
|
||||
int bitno = bit % 8;
|
||||
uchar mask = 1 << bitno;
|
||||
//fprintf ( stderr, "(byte %d bit %d mask %d)",
|
||||
// byteno, bitno, (int)mask );
|
||||
zbuf[byteno] ^= mask;
|
||||
}
|
||||
|
||||
int main ( int argc, char** argv )
|
||||
{
|
||||
FILE* f;
|
||||
int r;
|
||||
int bit;
|
||||
int i;
|
||||
|
||||
if (argc != 2) {
|
||||
fprintf ( stderr, "usage: unzcrash filename\n" );
|
||||
return 1;
|
||||
}
|
||||
|
||||
f = fopen ( argv[1], "r" );
|
||||
if (!f) {
|
||||
fprintf ( stderr, "unzcrash: can't open %s\n", argv[1] );
|
||||
return 1;
|
||||
}
|
||||
|
||||
nIn = fread ( inbuf, 1, M_BLOCK, f );
|
||||
fprintf ( stderr, "%d bytes read\n", nIn );
|
||||
|
||||
nZ = M_BLOCK;
|
||||
r = BZ2_bzBuffToBuffCompress (
|
||||
zbuf, &nZ, inbuf, nIn, 9, 0, 30 );
|
||||
|
||||
assert (r == BZ_OK);
|
||||
fprintf ( stderr, "%d after compression\n", nZ );
|
||||
|
||||
for (bit = 0; bit < nZ*8; bit++) {
|
||||
fprintf ( stderr, "bit %d ", bit );
|
||||
flip_bit ( bit );
|
||||
nOut = M_BLOCK_OUT;
|
||||
r = BZ2_bzBuffToBuffDecompress (
|
||||
outbuf, &nOut, zbuf, nZ, 0, 0 );
|
||||
fprintf ( stderr, " %d %s ", r, bzerrorstrings[-r] );
|
||||
|
||||
if (r != BZ_OK) {
|
||||
fprintf ( stderr, "\n" );
|
||||
} else {
|
||||
if (nOut != nIn) {
|
||||
fprintf(stderr, "nIn/nOut mismatch %d %d\n", nIn, nOut );
|
||||
return 1;
|
||||
} else {
|
||||
for (i = 0; i < nOut; i++)
|
||||
if (inbuf[i] != outbuf[i]) {
|
||||
fprintf(stderr, "mismatch at %d\n", i );
|
||||
return 1;
|
||||
}
|
||||
if (i == nOut) fprintf(stderr, "really ok!\n" );
|
||||
}
|
||||
}
|
||||
|
||||
flip_bit ( bit );
|
||||
}
|
||||
|
||||
#if 0
|
||||
assert (nOut == nIn);
|
||||
for (i = 0; i < nOut; i++) {
|
||||
if (inbuf[i] != outbuf[i]) {
|
||||
fprintf ( stderr, "difference at %d !\n", i );
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
fprintf ( stderr, "all ok\n" );
|
||||
return 0;
|
||||
}
|
5
words0
5
words0
|
@ -1,5 +0,0 @@
|
|||
|
||||
If compilation produces errors, or a large number of warnings,
|
||||
please read README.COMPILATION.PROBLEMS -- you might be able to
|
||||
adjust the flags in this Makefile to improve matters.
|
||||
|
4
words1
4
words1
|
@ -1,4 +1,4 @@
|
|||
|
||||
Doing 6 tests (3 compress, 3 uncompress) ...
|
||||
Doing 3 decompression tests ...
|
||||
If there's a problem, things might stop at this point.
|
||||
|
||||
|
||||
|
|
6
words2
6
words2
|
@ -1,5 +1,3 @@
|
|||
|
||||
Checking test results. If any of the four "cmp"s which follow
|
||||
report any differences, something is wrong. If you can't easily
|
||||
figure out what, please let me know (jseward@acm.org).
|
||||
|
||||
Checking test results. If any of the three "cmp"s which
|
||||
follow report any differences, something is wrong.
|
||||
|
|
33
words3
33
words3
|
@ -1,23 +1,16 @@
|
|||
|
||||
If you got this far and the "cmp"s didn't complain, it looks
|
||||
like you're in business.
|
||||
like you're in business.
|
||||
|
||||
To install in /usr/bin, /usr/lib, /usr/man and /usr/include, type
|
||||
make install
|
||||
To install somewhere else, eg, /xxx/yyy/{bin,lib,man,include}, type
|
||||
make install PREFIX=/xxx/yyy
|
||||
If you are (justifiably) paranoid and want to see what 'make install'
|
||||
is going to do, you can first do
|
||||
make -n install or
|
||||
make -n install PREFIX=/xxx/yyy respectively.
|
||||
The -n instructs make to show the commands it would execute, but
|
||||
not actually execute them.
|
||||
|
||||
Instructions for use are in the preformatted manual page, in the file
|
||||
bzip2.txt. For more detailed documentation, read the full manual.
|
||||
It is available in Postscript form (manual.ps), PDF form (manual.pdf),
|
||||
and HTML form (manual_toc.html).
|
||||
|
||||
You can also do "bzip2 --help" to see some helpful information.
|
||||
"bzip2 -L" displays the software license.
|
||||
To install in /usr/local/bin and /usr/local/man, type
|
||||
dmake justinstall
|
||||
To install somewhere else, eg, /xxx/yyy/{bin,man}, type
|
||||
dmake justinstall PREFIX=/xxx/yyy
|
||||
If you are (justifiably) paranoid and want to see what
|
||||
'dmake install' is going to do, type
|
||||
dmake -n justinstall or
|
||||
dmake -n justinstall PREFIX=/xxx/yyy repsectively.
|
||||
The -n instructs make to show the commands it would
|
||||
execute, but not actually execute them.
|
||||
|
||||
You can do "bunzip2 --help" to see some helpful information.
|
||||
"bunzip2 -L" displays the software license.
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
|
||||
If all went well, bunzip2 (and bzip2recover) should
|
||||
now be installed under your /usr/local hierarchy.
|
||||
|
||||
Put the following line in your gshrc file so you can use bzcat:
|
||||
alias bzcat "bunzip2 -c"
|
||||
|
||||
Instructions for use are in the man page for bunzip2. Type
|
||||
man bunzip2
|
||||
to read it.
|
Loading…
Reference in New Issue