diff --git a/Makefile b/Makefile index 8305235..a3d8779 100644 --- a/Makefile +++ b/Makefile @@ -1,192 +1,94 @@ +# Makefile for bunzip2 for GNO (for use with dmake) +# Based on Unix Makefile for bzip2 +# Modified for GNO by Stephen Heumann -SHELL=/bin/sh +# ORCA/C 2.1.0 may need more than 8 megabytes of RAM to compile decompress.c +# with full optimization enabled. Thus, this makefile can only +# be used as is on an emulated system with 14 megabyte RAM support. # To assist in cross-compiling -CC=gcc -AR=ar -RANLIB=ranlib +# Uncomment this if make doesn't have the $CC variable set appropriately +# CC=occ +RM=cp -p rm + LDFLAGS= -# Suitably paranoid flags to avoid bugs in gcc-2.7 -BIGFILES=-D_FILE_OFFSET_BITS=64 -CFLAGS=-Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce $(BIGFILES) +# The "-I /usr/include" shouldn't be needed but seemed to fix problems for me +CFLAGS=-a0 -w -O -I /usr/include + +NOROOTFLAG=-r # Where you want it installed when you do 'make install' -PREFIX=/usr +PREFIX=/usr/local -OBJS= blocksort.o \ +OBJS= stristr.o \ huffman.o \ crctable.o \ randtable.o \ - compress.o \ decompress.o \ bzlib.o -all: libbz2.a bzip2 bzip2recover test +all: bunzip2 bzip2recover test -bzip2: libbz2.a bzip2.o - $(CC) $(CFLAGS) $(LDFLAGS) -o bzip2 bzip2.o -L. -lbz2 +bunzip2: bzip2.o $(OBJS) + $(CC) -o bunzip2 bunzip2.rez + $(CC) $(CFLAGS) $(LDFLAGS) bzip2.o $(OBJS) -o bunzip2 bzip2recover: bzip2recover.o - $(CC) $(CFLAGS) $(LDFLAGS) -o bzip2recover bzip2recover.o - -libbz2.a: $(OBJS) - rm -f libbz2.a - $(AR) cq libbz2.a $(OBJS) - @if ( test -f $(RANLIB) -o -f /usr/bin/ranlib -o \ - -f /bin/ranlib -o -f /usr/ccs/bin/ranlib ) ; then \ - echo $(RANLIB) libbz2.a ; \ - $(RANLIB) libbz2.a ; \ - fi + $(CC) -o bzip2recover bzip2recover.rez + $(CC) $(CFLAGS) $(LDFLAGS) bzip2recover.o -o bzip2recover check: test -test: bzip2 +test: bunzip2 @cat words1 - ./bzip2 -1 < sample1.ref > sample1.rb2 - ./bzip2 -2 < sample2.ref > sample2.rb2 - ./bzip2 -3 < sample3.ref > sample3.rb2 - ./bzip2 -d < sample1.bz2 > sample1.tst - ./bzip2 -d < sample2.bz2 > sample2.tst - ./bzip2 -ds < sample3.bz2 > sample3.tst - cmp sample1.bz2 sample1.rb2 - cmp sample2.bz2 sample2.rb2 - cmp sample3.bz2 sample3.rb2 + ./bunzip2 -dk < sample1.bz2 > sample1.tst + ./bunzip2 -dk < sample2.bz2 > sample2.tst + ./bunzip2 -dks < sample3.bz2 > sample3.tst + @cat words2 cmp sample1.tst sample1.ref cmp sample2.tst sample2.ref cmp sample3.tst sample3.ref @cat words3 -install: bzip2 bzip2recover - if ( test ! -d $(PREFIX)/bin ) ; then mkdir -p $(PREFIX)/bin ; fi - if ( test ! -d $(PREFIX)/lib ) ; then mkdir -p $(PREFIX)/lib ; fi - if ( test ! -d $(PREFIX)/man ) ; then mkdir -p $(PREFIX)/man ; fi - if ( test ! -d $(PREFIX)/man/man1 ) ; then mkdir -p $(PREFIX)/man/man1 ; fi - if ( test ! -d $(PREFIX)/include ) ; then mkdir -p $(PREFIX)/include ; fi - cp -f bzip2 $(PREFIX)/bin/bzip2 - cp -f bzip2 $(PREFIX)/bin/bunzip2 - cp -f bzip2 $(PREFIX)/bin/bzcat +install: bunzip2 bzip2recover test justinstall + +justinstall: +# This should install bunzip2 for GNO under /usr/local + mkdir $(PREFIX)/bin >& .null + mkdir $(PREFIX)/man >& .null + mkdir $(PREFIX)/man/man1 >& .null + cp -f bunzip2 $(PREFIX)/bin/bunzip2 cp -f bzip2recover $(PREFIX)/bin/bzip2recover - chmod a+x $(PREFIX)/bin/bzip2 - chmod a+x $(PREFIX)/bin/bunzip2 - chmod a+x $(PREFIX)/bin/bzcat - chmod a+x $(PREFIX)/bin/bzip2recover - cp -f bzip2.1 $(PREFIX)/man/man1 - chmod a+r $(PREFIX)/man/man1/bzip2.1 - cp -f bzlib.h $(PREFIX)/include - chmod a+r $(PREFIX)/include/bzlib.h - cp -f libbz2.a $(PREFIX)/lib - chmod a+r $(PREFIX)/lib/libbz2.a - cp -f bzgrep $(PREFIX)/bin/bzgrep - ln $(PREFIX)/bin/bzgrep $(PREFIX)/bin/bzegrep - ln $(PREFIX)/bin/bzgrep $(PREFIX)/bin/bzfgrep - chmod a+x $(PREFIX)/bin/bzgrep - cp -f bzmore $(PREFIX)/bin/bzmore - ln $(PREFIX)/bin/bzmore $(PREFIX)/bin/bzless - chmod a+x $(PREFIX)/bin/bzmore - cp -f bzdiff $(PREFIX)/bin/bzdiff - ln $(PREFIX)/bin/bzdiff $(PREFIX)/bin/bzcmp - chmod a+x $(PREFIX)/bin/bzdiff - cp -f bzgrep.1 bzmore.1 bzdiff.1 $(PREFIX)/man/man1 - chmod a+r $(PREFIX)/man/man1/bzgrep.1 - chmod a+r $(PREFIX)/man/man1/bzmore.1 - chmod a+r $(PREFIX)/man/man1/bzdiff.1 - echo ".so man1/bzgrep.1" > $(PREFIX)/man/man1/bzegrep.1 - echo ".so man1/bzgrep.1" > $(PREFIX)/man/man1/bzfgrep.1 - echo ".so man1/bzmore.1" > $(PREFIX)/man/man1/bzless.1 - echo ".so man1/bzdiff.1" > $(PREFIX)/man/man1/bzcmp.1 + cp -f bunzip2.1 $(PREFIX)/man/man1/bunzip2.1 + cp -f bzip2recover.1 $(PREFIX)/man/man1/bzip2recover.1 + cp -f bzcat.1 $(PREFIX)/man/man1/bzcat.1 + @cat words4 distclean: clean -clean: - rm -f *.o libbz2.a bzip2 bzip2recover \ - sample1.rb2 sample2.rb2 sample3.rb2 \ +clean: + $(RM) -f *.o *.a *.sym *.root bunzip2 bzip2recover \ sample1.tst sample2.tst sample3.tst -blocksort.o: blocksort.c - @cat words0 - $(CC) $(CFLAGS) -c blocksort.c -huffman.o: huffman.c - $(CC) $(CFLAGS) -c huffman.c -crctable.o: crctable.c - $(CC) $(CFLAGS) -c crctable.c -randtable.o: randtable.c - $(CC) $(CFLAGS) -c randtable.c -compress.o: compress.c - $(CC) $(CFLAGS) -c compress.c -decompress.o: decompress.c - $(CC) $(CFLAGS) -c decompress.c -bzlib.o: bzlib.c - $(CC) $(CFLAGS) -c bzlib.c -bzip2.o: bzip2.c - $(CC) $(CFLAGS) -c bzip2.c +stristr.o: stristr.c + $(CC) $(CFLAGS) $(NOROOTFLAG) -c stristr.c +huffman.o: huffman.c bzlib_private.h + $(CC) $(CFLAGS) $(NOROOTFLAG) -c huffman.c +crctable.o: crctable.c bzlib_private.h + $(CC) $(CFLAGS) $(NOROOTFLAG) -c crctable.c +randtable.o: randtable.c bzlib_private.h + $(CC) $(CFLAGS) $(NOROOTFLAG) -c randtable.c +decompress.o: decompress.c bzlib_private.h + $(CC) $(CFLAGS) $(NOROOTFLAG) -c decompress.c +bzlib.o: bzlib.c bzlib_private.h + $(CC) $(CFLAGS) $(NOROOTFLAG) -c bzlib.c +bzip2.o: bzip2.c bzlib.h + $(CC) $(CFLAGS) -s 2048 -C1 -c bzip2.c +# $(CC) $(CFLAGS) -C1 -D __STACK_CHECK__ -c bzip2.c bzip2recover.o: bzip2recover.c - $(CC) $(CFLAGS) -c bzip2recover.c + $(CC) $(CFLAGS) -s 1024 -c bzip2recover.c +# $(CC) $(CFLAGS) -D __STACK_CHECK__ -c bzip2recover.c +bzlib_private.h: bzlib.h -DISTNAME=bzip2-1.0.2 -tarfile: - rm -f $(DISTNAME) - ln -sf . $(DISTNAME) - tar cvf $(DISTNAME).tar \ - $(DISTNAME)/blocksort.c \ - $(DISTNAME)/huffman.c \ - $(DISTNAME)/crctable.c \ - $(DISTNAME)/randtable.c \ - $(DISTNAME)/compress.c \ - $(DISTNAME)/decompress.c \ - $(DISTNAME)/bzlib.c \ - $(DISTNAME)/bzip2.c \ - $(DISTNAME)/bzip2recover.c \ - $(DISTNAME)/bzlib.h \ - $(DISTNAME)/bzlib_private.h \ - $(DISTNAME)/Makefile \ - $(DISTNAME)/manual.texi \ - $(DISTNAME)/manual.ps \ - $(DISTNAME)/manual.pdf \ - $(DISTNAME)/LICENSE \ - $(DISTNAME)/bzip2.1 \ - $(DISTNAME)/bzip2.1.preformatted \ - $(DISTNAME)/bzip2.txt \ - $(DISTNAME)/words0 \ - $(DISTNAME)/words1 \ - $(DISTNAME)/words2 \ - $(DISTNAME)/words3 \ - $(DISTNAME)/sample1.ref \ - $(DISTNAME)/sample2.ref \ - $(DISTNAME)/sample3.ref \ - $(DISTNAME)/sample1.bz2 \ - $(DISTNAME)/sample2.bz2 \ - $(DISTNAME)/sample3.bz2 \ - $(DISTNAME)/dlltest.c \ - $(DISTNAME)/*.html \ - $(DISTNAME)/README \ - $(DISTNAME)/README.COMPILATION.PROBLEMS \ - $(DISTNAME)/CHANGES \ - $(DISTNAME)/libbz2.def \ - $(DISTNAME)/libbz2.dsp \ - $(DISTNAME)/dlltest.dsp \ - $(DISTNAME)/makefile.msc \ - $(DISTNAME)/Y2K_INFO \ - $(DISTNAME)/unzcrash.c \ - $(DISTNAME)/spewG.c \ - $(DISTNAME)/mk251.c \ - $(DISTNAME)/bzdiff \ - $(DISTNAME)/bzdiff.1 \ - $(DISTNAME)/bzmore \ - $(DISTNAME)/bzmore.1 \ - $(DISTNAME)/bzgrep \ - $(DISTNAME)/bzgrep.1 \ - $(DISTNAME)/Makefile-libbz2_so - gzip -v $(DISTNAME).tar - -# For rebuilding the manual from sources on my RedHat 7.2 box -manual: manual.ps manual.pdf manual.html - -manual.ps: manual.texi - tex manual.texi - dvips -o manual.ps manual.dvi - -manual.pdf: manual.ps - ps2pdf manual.ps - -manual.html: manual.texi - texi2html -split_chapter manual.texi +chtyp: + chtyp -l cc *.c *.h diff --git a/Makefile-libbz2_so b/Makefile-libbz2_so deleted file mode 100644 index 4986fe2..0000000 --- a/Makefile-libbz2_so +++ /dev/null @@ -1,44 +0,0 @@ - -# This Makefile builds a shared version of the library, -# libbz2.so.1.0.2, with soname libbz2.so.1.0, -# at least on x86-Linux (RedHat 7.2), -# with gcc-2.96 20000731 (Red Hat Linux 7.1 2.96-98). -# Please see the README file for some -# important info about building the library like this. - -SHELL=/bin/sh -CC=gcc -BIGFILES=-D_FILE_OFFSET_BITS=64 -CFLAGS=-fpic -fPIC -Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce $(BIGFILES) - -OBJS= blocksort.o \ - huffman.o \ - crctable.o \ - randtable.o \ - compress.o \ - decompress.o \ - bzlib.o - -all: $(OBJS) - $(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.2 $(OBJS) - $(CC) $(CFLAGS) -o bzip2-shared bzip2.c libbz2.so.1.0.2 - rm -f libbz2.so.1.0 - ln -s libbz2.so.1.0.2 libbz2.so.1.0 - -clean: - rm -f $(OBJS) bzip2.o libbz2.so.1.0.2 libbz2.so.1.0 bzip2-shared - -blocksort.o: blocksort.c - $(CC) $(CFLAGS) -c blocksort.c -huffman.o: huffman.c - $(CC) $(CFLAGS) -c huffman.c -crctable.o: crctable.c - $(CC) $(CFLAGS) -c crctable.c -randtable.o: randtable.c - $(CC) $(CFLAGS) -c randtable.c -compress.o: compress.c - $(CC) $(CFLAGS) -c compress.c -decompress.o: decompress.c - $(CC) $(CFLAGS) -c decompress.c -bzlib.o: bzlib.c - $(CC) $(CFLAGS) -c bzlib.c diff --git a/README.COMPILATION.PROBLEMS b/README.COMPILATION.PROBLEMS deleted file mode 100644 index bd1822d..0000000 --- a/README.COMPILATION.PROBLEMS +++ /dev/null @@ -1,130 +0,0 @@ - -bzip2-1.0 should compile without problems on the vast majority of -platforms. Using the supplied Makefile, I've built and tested it -myself for x86-linux, sparc-solaris, alpha-linux, x86-cygwin32 and -alpha-tru64unix. With makefile.msc, Visual C++ 6.0 and nmake, you can -build a native Win32 version too. Large file support seems to work -correctly on at least alpha-tru64unix and x86-cygwin32 (on Windows -2000). - -When I say "large file" I mean a file of size 2,147,483,648 (2^31) -bytes or above. Many older OSs can't handle files above this size, -but many newer ones can. Large files are pretty huge -- most files -you'll encounter are not Large Files. - -Earlier versions of bzip2 (0.1, 0.9.0, 0.9.5) compiled on a wide -variety of platforms without difficulty, and I hope this version will -continue in that tradition. However, in order to support large files, -I've had to include the define -D_FILE_OFFSET_BITS=64 in the Makefile. -This can cause problems. - -The technique of adding -D_FILE_OFFSET_BITS=64 to get large file -support is, as far as I know, the Recommended Way to get correct large -file support. For more details, see the Large File Support -Specification, published by the Large File Summit, at - http://www.sas.com/standard/large.file/ - -As a general comment, if you get compilation errors which you think -are related to large file support, try removing the above define from -the Makefile, ie, delete the line - BIGFILES=-D_FILE_OFFSET_BITS=64 -from the Makefile, and do 'make clean ; make'. This will give you a -version of bzip2 without large file support, which, for most -applications, is probably not a problem. - -Alternatively, try some of the platform-specific hints listed below. - -You can use the spewG.c program to generate huge files to test bzip2's -large file support, if you are feeling paranoid. Be aware though that -any compilation problems which affect bzip2 will also affect spewG.c, -alas. - - -Known problems as of 1.0pre8: -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -* HP/UX 10.20 and 11.00, using gcc (2.7.2.3 and 2.95.2): A large - number of warnings appear, including the following: - - /usr/include/sys/resource.h: In function `getrlimit': - /usr/include/sys/resource.h:168: - warning: implicit declaration of function `__getrlimit64' - /usr/include/sys/resource.h: In function `setrlimit': - /usr/include/sys/resource.h:170: - warning: implicit declaration of function `__setrlimit64' - - This would appear to be a problem with large file support, header - files and gcc. gcc may or may not give up at this point. If it - fails, you might be able to improve matters by adding - -D__STDC_EXT__=1 - to the BIGFILES variable in the Makefile (ie, change its definition - to - BIGFILES=-D_FILE_OFFSET_BITS=64 -D__STDC_EXT__=1 - - Even if gcc does produce a binary which appears to work (ie passes - its self-tests), you might want to test it to see if it works properly - on large files. - - -* HP/UX 10.20 and 11.00, using HP's cc compiler. - - No specific problems for this combination, except that you'll need to - specify the -Ae flag, and zap the gcc-specific stuff - -Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce. - You should retain -D_FILE_OFFSET_BITS=64 in order to get large - file support -- which is reported to work ok for this HP/UX + cc - combination. - - -* SunOS 4.1.X. - - Amazingly, there are still people out there using this venerable old - banger. I shouldn't be too rude -- I started life on SunOS, and - it was a pretty darn good OS, way back then. Anyway: - - SunOS doesn't seem to have strerror(), so you'll have to use - perror(), perhaps by doing adding this (warning: UNTESTED CODE): - - char* strerror ( int errnum ) - { - if (errnum < 0 || errnum >= sys_nerr) - return "Unknown error"; - else - return sys_errlist[errnum]; - } - - Or you could comment out the relevant calls to strerror; they're - not mission-critical. Or you could upgrade to Solaris. Ha ha ha! - (what?? you think I've got Bad Attitude?) - - -* Making a shared library on Solaris. (Not really a compilation - problem, but many people ask ...) - - Firstly, if you have Solaris 8, either you have libbz2.so already - on your system, or you can install it from the Solaris CD. - - Secondly, be aware that there are potential naming conflicts - between the .so file supplied with Solaris 8, and the .so file - which Makefile-libbz2_so will make. Makefile-libbz2_so creates - a .so which has the names which I intend to be "official" as - of version 1.0.0 and onwards. Unfortunately, the .so in - Solaris 8 appeared before I decided on the final names, so - the two libraries are incompatible. We have since communicated - and I hope that the problems will have been solved in the next - version of Solaris, whenever that might appear. - - All that said: you might be able to get somewhere - by finding the line in Makefile-libbz2_so which says - - $(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.2 $(OBJS) - - and replacing with - - $(CC) -G -shared -o libbz2.so.1.0.2 -h libbz2.so.1.0 $(OBJS) - - If gcc objects to the combination -fpic -fPIC, get rid of - the second one, leaving just "-fpic". - - -That's the end of the currently known compilation problems. diff --git a/README.GNO b/README.GNO new file mode 100644 index 0000000..6e8423e --- /dev/null +++ b/README.GNO @@ -0,0 +1,158 @@ +README FOR BUNZIP2 FOR GNO +========================== +This is a port of the bunzip2 archive decompression program to the GNO +environment on the Apple IIgs. It is based on Julian Seward's original +bzip2 program, but it includes only the decompression (and testing) +functionality; compression is disabled. This archive also includes the +bzip2recover program, which may allow you to recover some data from a +partially corrupted bzip2 archive file. These programs correspond to +Julian Seward's bzip2 version 1.0.2. + + +REQUIREMENTS +============ +Bunzip2 reguires a ROM 01 or ROM 3 Apple IIgs (or an emulator thereof) +running IIgs System Software 6.0.1 and GNO 2.0.6 (or later). + +Bunzip2 also needs considerable memory. It will not be able to decompress +most archives if you have less than 4 megabytes of RAM. On 4-5 MB +systems, you will likely have to specify the -s option to minimize memory +usage; on an 8MB (or 14MB) system, this will probably not be necessary, +unless you have a very large number of system extensions or other programs +running under GNO. See the manpage for more details on memory usage. +If bunzip2 gives you an out-of-memory error the first time you run it, try +again. The first attempt may have caused the system to reorganize memory +and purge unneeded data, freeing up enough space to run bunzip2 on the +second attempt. + +Bunzip2 will also benefit from an accelerator, although one is obviously +not required. Even with an accelerator, it can be rather slow when +decompressing larger archives. Be prepared to wait a very long time +(several hours or even longer) for bunzip2 to finish decompressing large +bzip2 archives. + + +INSTALLATION +============ +To install bunzip2, simply run "dmake justinstall". Alternatively, you can +install it manually: copy the bunzip2 and bzip2recover programs to your GNO +installation's /usr/local/bin directory, and copy the bunzip2.1, bzcat.1, +and bzip2recover.1 manpages to the /usr/local/man/man1 directory. + +After installing bunzip2, you should read the manpage for directions on how +to use it. You can put the following line in your gshrc file so you can use +'bzcat' as documented in the manpage: + +alias bzcat "bunzip2 -c" + + +NOTES ON THE SOURCE CODE +======================== +[If you just want to use bunzip2, you do not need to read this section.] + +Please note that a couple source files use non-ProDOS compatible filenames. +If you do not have an HFS or AppleShare partition available, these can +easily be changed to fit ProDOS conventions. + +I had to make several changes to the bzip2 program when porting it to GNO. +The code is not very good-looking, but it does compile without warnings. + +First, I disabled the compression functionality and set up the program to +decompress by default (and I renamed the binary to 'bunzip2' to reflect +this). The compression functionality is not very important on the GS, since +bzip2 is not a very good choice for compressing GS-specific data; ShrinkIt +will be much faster and preserves GS-specific file attributes. Even if you +want to create archives for use on UNIX-like systems, compress or gzip is +a better choice, and both are already available under GNO. For these +reasons, and because it reduced the amount of code that I had to modify, I +removed the compression functionality from bunzip2. + +Other major changes to the code fell into several categories: +(1) Type sizes: Most of the code used defines for types such as Int32, making + it easy to adapt to the GS's 16-bit ints. The interface between the + bzip2 program and code designed to be compiled as 'libbzip2,' however, + assumes that int is 32 bits, so I had to modify it to use the appropriate + integer types on the GS. There were also silent assumptions in some + other areas that native ints are 32 bits, and I had to identify and + correct these. There were also variables specified as 'Int32' even + though 16 bits were sufficient to represent their possible range of + values; when I noticed these variables, I changed them appropriately. + +(2) ORCA/C compiler limitations: ORCA/C in its 'small mode' (the only one + supported by the GNO libraries) places a 64k restriction on the size + of data structures that can be addressed as arrays. This is a problem + with bunzip2, which allocates and uses multi-megabyte data structures. + To work around this, I changed array-style references to these data + structures to use printer arithmetic instead, working around the + limitation (eg. I changed references to 'a[b]' to '*(a+b)'. ). I also + changed large local variables to be static or dynamically allocated + in order to avoid excessive stack usage. + +(3) ORCA/C compiler bugs: In several cases ORCA/C 2.1.0 generated bad code + at the maximum optimization level. Most instances where reduced + optimization levels are used are necessary to work around bugs encountered + when using the disabled optimizations. Also, the size of the main + decompression function in decompress.c stresses ORCA/C. I modified + the GET_BITS macro to reduce the code size of the BZ2_decompress function + by making some of the code into a separate function. If this is not done + or if optimization is not enabled (increasing the compiled code size + as compared to when optimization is enabled), the compiler will crash, + give an error, or generate bad object code that gives linker errors. + +(4) Modifications to work well with GNO and GS/OS These include setting the + output filetype and disabling newline translation in GNO's stdio + implementation. I also set the stack sizes of the programs to + appropriate values and enabled stack checking for the small recursive + segment of the program (although it shouldn't actually pose any problem). + Additionally, I changed filename operations to be case-insensitive, + reflecting the case-insensitive nature of filesystems in the Apple IIgs. + +I made most modifications conditional on the __appleiigs__, __ORCAC__, or +__GNO__ macros. Which macro I used gives some hint at the reason for each +modification, although all or none should be used to produce a working +executable (changes conditionalized on one macro may depend on those +conditionalized on another). + + +COMPILING +========= +The included Makefile can be used with dmake, occ, and ORCA/C 2.1.0, all of +which should be installed in your GNO 2.0.6 installation. You will also need +a copy of the lsaneglue library (which is missing from the default GNO 2.0.6 +installation) to be present in your GNO /lib directory. Run 'dmake bunzip2' +to build the main program or 'dmake test' to build both programs and run a +simple test to ensure that bunzip2 is working correctly. + +There are some special considerations necessary when compiling the file +decompress.c. As noted above, it must be compiled with (nearly) full +optimization to compile properly. To compile it with full optimization using +ORCA/C 2.1.0, however, requires more than 8MB of memory. Thus, decompress.c +(and by extension the bunzip2 program as a whole) can only be compiled on an +emulator with 14MB memory support enabled. The only emulators that presently +support this are Bernie ][ The Rescue and Sweet16. I have included a +prebuilt object file (decompress.o) so that you can rebuild bunzip2 with +changes to other source files using a real IIgs. + + +AREAS FOR IMPROVEMENT +===================== +* Resource forks and GS/OS filetypes are not supported. This is not a major + problem; other programs such as ShrinkIt should be used for GS-specific + archives. +* Compression could be reenabled. This would require adapting the compression + and block sorting routines to work properly under GNO on the GS. +* Some or all of the program could be rewritten in assembly language. This + would improve its performance by some amount, although I don't know how + much. It also might reduce memory usage. This would require a full + understanding of the BWT compression and decompression algorithms used in + bzip2, which I do not presently possess. + + +SUPPORT +======= +I can be contacted by email at sheumann@myrealbox.com . Please contect me, +rather than Julian Seward, about any problems that you are experiencing only +in the GNO version of bunzip2. + +-- +Stephen Heumann diff --git a/README b/README.bzip2 similarity index 100% rename from README rename to README.bzip2 diff --git a/Y2K_INFO b/Y2K_INFO deleted file mode 100644 index 55fd56a..0000000 --- a/Y2K_INFO +++ /dev/null @@ -1,34 +0,0 @@ - -Y2K status of bzip2 and libbzip2, versions 0.1, 0.9.0 and 0.9.5 -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Informally speaking: - bzip2 is a compression program built on top of libbzip2, - a library which does the real work of compression and - decompression. As far as I am aware, libbzip2 does not have - any date-related code at all. - - bzip2 itself copies dates from source to destination files - when compressing or decompressing, using the 'stat' and 'utime' - UNIX system calls. It doesn't examine, manipulate or store the - dates in any way. So as far as I can see, there shouldn't be any - problem with bzip2 providing 'stat' and 'utime' work correctly - on your system. - - On non-unix platforms (those for which BZ_UNIX in bzip2.c is - not set to 1), bzip2 doesn't even do the date copying. - - Overall, informally speaking, I don't think bzip2 or libbzip2 - have a Y2K problem. - -Formally speaking: - I am not prepared to offer you any assurance whatsoever - regarding Y2K issues in my software. You alone assume the - entire risk of using the software. The disclaimer of liability - in the LICENSE file in the bzip2 source distribution continues - to apply on this issue as with every other issue pertaining - to the software. - -Julian Seward -Cambridge, UK -25 August 1999 diff --git a/blocksort.c b/blocksort.c deleted file mode 100644 index aba3efc..0000000 --- a/blocksort.c +++ /dev/null @@ -1,1141 +0,0 @@ - -/*-------------------------------------------------------------*/ -/*--- Block sorting machinery ---*/ -/*--- blocksort.c ---*/ -/*-------------------------------------------------------------*/ - -/*-- - This file is a part of bzip2 and/or libbzip2, a program and - library for lossless, block-sorting data compression. - - Copyright (C) 1996-2002 Julian R Seward. All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - 2. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment in the product - documentation would be appreciated but is not required. - - 3. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 4. The name of the author may not be used to endorse or promote - products derived from this software without specific prior written - permission. - - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS - OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE - GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - Julian Seward, Cambridge, UK. - jseward@acm.org - bzip2/libbzip2 version 1.0 of 21 March 2000 - - This program is based on (at least) the work of: - Mike Burrows - David Wheeler - Peter Fenwick - Alistair Moffat - Radford Neal - Ian H. Witten - Robert Sedgewick - Jon L. Bentley - - For more information on these sources, see the manual. - - To get some idea how the block sorting algorithms in this file - work, read my paper - On the Performance of BWT Sorting Algorithms - in Proceedings of the IEEE Data Compression Conference 2000, - Snowbird, Utah, USA, 27-30 March 2000. The main sort in this - file implements the algorithm called cache in the paper. ---*/ - - -#include "bzlib_private.h" - -/*---------------------------------------------*/ -/*--- Fallback O(N log(N)^2) sorting ---*/ -/*--- algorithm, for repetitive blocks ---*/ -/*---------------------------------------------*/ - -/*---------------------------------------------*/ -static -__inline__ -void fallbackSimpleSort ( UInt32* fmap, - UInt32* eclass, - Int32 lo, - Int32 hi ) -{ - Int32 i, j, tmp; - UInt32 ec_tmp; - - if (lo == hi) return; - - if (hi - lo > 3) { - for ( i = hi-4; i >= lo; i-- ) { - tmp = fmap[i]; - ec_tmp = eclass[tmp]; - for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 ) - fmap[j-4] = fmap[j]; - fmap[j-4] = tmp; - } - } - - for ( i = hi-1; i >= lo; i-- ) { - tmp = fmap[i]; - ec_tmp = eclass[tmp]; - for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ ) - fmap[j-1] = fmap[j]; - fmap[j-1] = tmp; - } -} - - -/*---------------------------------------------*/ -#define fswap(zz1, zz2) \ - { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } - -#define fvswap(zzp1, zzp2, zzn) \ -{ \ - Int32 yyp1 = (zzp1); \ - Int32 yyp2 = (zzp2); \ - Int32 yyn = (zzn); \ - while (yyn > 0) { \ - fswap(fmap[yyp1], fmap[yyp2]); \ - yyp1++; yyp2++; yyn--; \ - } \ -} - - -#define fmin(a,b) ((a) < (b)) ? (a) : (b) - -#define fpush(lz,hz) { stackLo[sp] = lz; \ - stackHi[sp] = hz; \ - sp++; } - -#define fpop(lz,hz) { sp--; \ - lz = stackLo[sp]; \ - hz = stackHi[sp]; } - -#define FALLBACK_QSORT_SMALL_THRESH 10 -#define FALLBACK_QSORT_STACK_SIZE 100 - - -static -void fallbackQSort3 ( UInt32* fmap, - UInt32* eclass, - Int32 loSt, - Int32 hiSt ) -{ - Int32 unLo, unHi, ltLo, gtHi, n, m; - Int32 sp, lo, hi; - UInt32 med, r, r3; - Int32 stackLo[FALLBACK_QSORT_STACK_SIZE]; - Int32 stackHi[FALLBACK_QSORT_STACK_SIZE]; - - r = 0; - - sp = 0; - fpush ( loSt, hiSt ); - - while (sp > 0) { - - AssertH ( sp < FALLBACK_QSORT_STACK_SIZE, 1004 ); - - fpop ( lo, hi ); - if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { - fallbackSimpleSort ( fmap, eclass, lo, hi ); - continue; - } - - /* Random partitioning. Median of 3 sometimes fails to - avoid bad cases. Median of 9 seems to help but - looks rather expensive. This too seems to work but - is cheaper. Guidance for the magic constants - 7621 and 32768 is taken from Sedgewick's algorithms - book, chapter 35. - */ - r = ((r * 7621) + 1) % 32768; - r3 = r % 3; - if (r3 == 0) med = eclass[fmap[lo]]; else - if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else - med = eclass[fmap[hi]]; - - unLo = ltLo = lo; - unHi = gtHi = hi; - - while (1) { - while (1) { - if (unLo > unHi) break; - n = (Int32)eclass[fmap[unLo]] - (Int32)med; - if (n == 0) { - fswap(fmap[unLo], fmap[ltLo]); - ltLo++; unLo++; - continue; - }; - if (n > 0) break; - unLo++; - } - while (1) { - if (unLo > unHi) break; - n = (Int32)eclass[fmap[unHi]] - (Int32)med; - if (n == 0) { - fswap(fmap[unHi], fmap[gtHi]); - gtHi--; unHi--; - continue; - }; - if (n < 0) break; - unHi--; - } - if (unLo > unHi) break; - fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; - } - - AssertD ( unHi == unLo-1, "fallbackQSort3(2)" ); - - if (gtHi < ltLo) continue; - - n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n); - m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m); - - n = lo + unLo - ltLo - 1; - m = hi - (gtHi - unHi) + 1; - - if (n - lo > hi - m) { - fpush ( lo, n ); - fpush ( m, hi ); - } else { - fpush ( m, hi ); - fpush ( lo, n ); - } - } -} - -#undef fmin -#undef fpush -#undef fpop -#undef fswap -#undef fvswap -#undef FALLBACK_QSORT_SMALL_THRESH -#undef FALLBACK_QSORT_STACK_SIZE - - -/*---------------------------------------------*/ -/* Pre: - nblock > 0 - eclass exists for [0 .. nblock-1] - ((UChar*)eclass) [0 .. nblock-1] holds block - ptr exists for [0 .. nblock-1] - - Post: - ((UChar*)eclass) [0 .. nblock-1] holds block - All other areas of eclass destroyed - fmap [0 .. nblock-1] holds sorted order - bhtab [ 0 .. 2+(nblock/32) ] destroyed -*/ - -#define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31)) -#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31)) -#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31))) -#define WORD_BH(zz) bhtab[(zz) >> 5] -#define UNALIGNED_BH(zz) ((zz) & 0x01f) - -static -void fallbackSort ( UInt32* fmap, - UInt32* eclass, - UInt32* bhtab, - Int32 nblock, - Int32 verb ) -{ - Int32 ftab[257]; - Int32 ftabCopy[256]; - Int32 H, i, j, k, l, r, cc, cc1; - Int32 nNotDone; - Int32 nBhtab; - UChar* eclass8 = (UChar*)eclass; - - /*-- - Initial 1-char radix sort to generate - initial fmap and initial BH bits. - --*/ - if (verb >= 4) - VPrintf0 ( " bucket sorting ...\n" ); - for (i = 0; i < 257; i++) ftab[i] = 0; - for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; - for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; - for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; - - for (i = 0; i < nblock; i++) { - j = eclass8[i]; - k = ftab[j] - 1; - ftab[j] = k; - fmap[k] = i; - } - - nBhtab = 2 + (nblock / 32); - for (i = 0; i < nBhtab; i++) bhtab[i] = 0; - for (i = 0; i < 256; i++) SET_BH(ftab[i]); - - /*-- - Inductively refine the buckets. Kind-of an - "exponential radix sort" (!), inspired by the - Manber-Myers suffix array construction algorithm. - --*/ - - /*-- set sentinel bits for block-end detection --*/ - for (i = 0; i < 32; i++) { - SET_BH(nblock + 2*i); - CLEAR_BH(nblock + 2*i + 1); - } - - /*-- the log(N) loop --*/ - H = 1; - while (1) { - - if (verb >= 4) - VPrintf1 ( " depth %6d has ", H ); - - j = 0; - for (i = 0; i < nblock; i++) { - if (ISSET_BH(i)) j = i; - k = fmap[i] - H; if (k < 0) k += nblock; - eclass[k] = j; - } - - nNotDone = 0; - r = -1; - while (1) { - - /*-- find the next non-singleton bucket --*/ - k = r + 1; - while (ISSET_BH(k) && UNALIGNED_BH(k)) k++; - if (ISSET_BH(k)) { - while (WORD_BH(k) == 0xffffffff) k += 32; - while (ISSET_BH(k)) k++; - } - l = k - 1; - if (l >= nblock) break; - while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++; - if (!ISSET_BH(k)) { - while (WORD_BH(k) == 0x00000000) k += 32; - while (!ISSET_BH(k)) k++; - } - r = k - 1; - if (r >= nblock) break; - - /*-- now [l, r] bracket current bucket --*/ - if (r > l) { - nNotDone += (r - l + 1); - fallbackQSort3 ( fmap, eclass, l, r ); - - /*-- scan bucket and generate header bits-- */ - cc = -1; - for (i = l; i <= r; i++) { - cc1 = eclass[fmap[i]]; - if (cc != cc1) { SET_BH(i); cc = cc1; }; - } - } - } - - if (verb >= 4) - VPrintf1 ( "%6d unresolved strings\n", nNotDone ); - - H *= 2; - if (H > nblock || nNotDone == 0) break; - } - - /*-- - Reconstruct the original block in - eclass8 [0 .. nblock-1], since the - previous phase destroyed it. - --*/ - if (verb >= 4) - VPrintf0 ( " reconstructing block ...\n" ); - j = 0; - for (i = 0; i < nblock; i++) { - while (ftabCopy[j] == 0) j++; - ftabCopy[j]--; - eclass8[fmap[i]] = (UChar)j; - } - AssertH ( j < 256, 1005 ); -} - -#undef SET_BH -#undef CLEAR_BH -#undef ISSET_BH -#undef WORD_BH -#undef UNALIGNED_BH - - -/*---------------------------------------------*/ -/*--- The main, O(N^2 log(N)) sorting ---*/ -/*--- algorithm. Faster for "normal" ---*/ -/*--- non-repetitive blocks. ---*/ -/*---------------------------------------------*/ - -/*---------------------------------------------*/ -static -__inline__ -Bool mainGtU ( UInt32 i1, - UInt32 i2, - UChar* block, - UInt16* quadrant, - UInt32 nblock, - Int32* budget ) -{ - Int32 k; - UChar c1, c2; - UInt16 s1, s2; - - AssertD ( i1 != i2, "mainGtU" ); - /* 1 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - /* 2 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - /* 3 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - /* 4 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - /* 5 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - /* 6 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - /* 7 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - /* 8 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - /* 9 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - /* 10 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - /* 11 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - /* 12 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - - k = nblock + 8; - - do { - /* 1 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - s1 = quadrant[i1]; s2 = quadrant[i2]; - if (s1 != s2) return (s1 > s2); - i1++; i2++; - /* 2 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - s1 = quadrant[i1]; s2 = quadrant[i2]; - if (s1 != s2) return (s1 > s2); - i1++; i2++; - /* 3 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - s1 = quadrant[i1]; s2 = quadrant[i2]; - if (s1 != s2) return (s1 > s2); - i1++; i2++; - /* 4 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - s1 = quadrant[i1]; s2 = quadrant[i2]; - if (s1 != s2) return (s1 > s2); - i1++; i2++; - /* 5 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - s1 = quadrant[i1]; s2 = quadrant[i2]; - if (s1 != s2) return (s1 > s2); - i1++; i2++; - /* 6 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - s1 = quadrant[i1]; s2 = quadrant[i2]; - if (s1 != s2) return (s1 > s2); - i1++; i2++; - /* 7 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - s1 = quadrant[i1]; s2 = quadrant[i2]; - if (s1 != s2) return (s1 > s2); - i1++; i2++; - /* 8 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - s1 = quadrant[i1]; s2 = quadrant[i2]; - if (s1 != s2) return (s1 > s2); - i1++; i2++; - - if (i1 >= nblock) i1 -= nblock; - if (i2 >= nblock) i2 -= nblock; - - k -= 8; - (*budget)--; - } - while (k >= 0); - - return False; -} - - -/*---------------------------------------------*/ -/*-- - Knuth's increments seem to work better - than Incerpi-Sedgewick here. Possibly - because the number of elems to sort is - usually small, typically <= 20. ---*/ -static -Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, - 9841, 29524, 88573, 265720, - 797161, 2391484 }; - -static -void mainSimpleSort ( UInt32* ptr, - UChar* block, - UInt16* quadrant, - Int32 nblock, - Int32 lo, - Int32 hi, - Int32 d, - Int32* budget ) -{ - Int32 i, j, h, bigN, hp; - UInt32 v; - - bigN = hi - lo + 1; - if (bigN < 2) return; - - hp = 0; - while (incs[hp] < bigN) hp++; - hp--; - - for (; hp >= 0; hp--) { - h = incs[hp]; - - i = lo + h; - while (True) { - - /*-- copy 1 --*/ - if (i > hi) break; - v = ptr[i]; - j = i; - while ( mainGtU ( - ptr[j-h]+d, v+d, block, quadrant, nblock, budget - ) ) { - ptr[j] = ptr[j-h]; - j = j - h; - if (j <= (lo + h - 1)) break; - } - ptr[j] = v; - i++; - - /*-- copy 2 --*/ - if (i > hi) break; - v = ptr[i]; - j = i; - while ( mainGtU ( - ptr[j-h]+d, v+d, block, quadrant, nblock, budget - ) ) { - ptr[j] = ptr[j-h]; - j = j - h; - if (j <= (lo + h - 1)) break; - } - ptr[j] = v; - i++; - - /*-- copy 3 --*/ - if (i > hi) break; - v = ptr[i]; - j = i; - while ( mainGtU ( - ptr[j-h]+d, v+d, block, quadrant, nblock, budget - ) ) { - ptr[j] = ptr[j-h]; - j = j - h; - if (j <= (lo + h - 1)) break; - } - ptr[j] = v; - i++; - - if (*budget < 0) return; - } - } -} - - -/*---------------------------------------------*/ -/*-- - The following is an implementation of - an elegant 3-way quicksort for strings, - described in a paper "Fast Algorithms for - Sorting and Searching Strings", by Robert - Sedgewick and Jon L. Bentley. ---*/ - -#define mswap(zz1, zz2) \ - { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } - -#define mvswap(zzp1, zzp2, zzn) \ -{ \ - Int32 yyp1 = (zzp1); \ - Int32 yyp2 = (zzp2); \ - Int32 yyn = (zzn); \ - while (yyn > 0) { \ - mswap(ptr[yyp1], ptr[yyp2]); \ - yyp1++; yyp2++; yyn--; \ - } \ -} - -static -__inline__ -UChar mmed3 ( UChar a, UChar b, UChar c ) -{ - UChar t; - if (a > b) { t = a; a = b; b = t; }; - if (b > c) { - b = c; - if (a > b) b = a; - } - return b; -} - -#define mmin(a,b) ((a) < (b)) ? (a) : (b) - -#define mpush(lz,hz,dz) { stackLo[sp] = lz; \ - stackHi[sp] = hz; \ - stackD [sp] = dz; \ - sp++; } - -#define mpop(lz,hz,dz) { sp--; \ - lz = stackLo[sp]; \ - hz = stackHi[sp]; \ - dz = stackD [sp]; } - - -#define mnextsize(az) (nextHi[az]-nextLo[az]) - -#define mnextswap(az,bz) \ - { Int32 tz; \ - tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ - tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ - tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; } - - -#define MAIN_QSORT_SMALL_THRESH 20 -#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) -#define MAIN_QSORT_STACK_SIZE 100 - -static -void mainQSort3 ( UInt32* ptr, - UChar* block, - UInt16* quadrant, - Int32 nblock, - Int32 loSt, - Int32 hiSt, - Int32 dSt, - Int32* budget ) -{ - Int32 unLo, unHi, ltLo, gtHi, n, m, med; - Int32 sp, lo, hi, d; - - Int32 stackLo[MAIN_QSORT_STACK_SIZE]; - Int32 stackHi[MAIN_QSORT_STACK_SIZE]; - Int32 stackD [MAIN_QSORT_STACK_SIZE]; - - Int32 nextLo[3]; - Int32 nextHi[3]; - Int32 nextD [3]; - - sp = 0; - mpush ( loSt, hiSt, dSt ); - - while (sp > 0) { - - AssertH ( sp < MAIN_QSORT_STACK_SIZE, 1001 ); - - mpop ( lo, hi, d ); - if (hi - lo < MAIN_QSORT_SMALL_THRESH || - d > MAIN_QSORT_DEPTH_THRESH) { - mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget ); - if (*budget < 0) return; - continue; - } - - med = (Int32) - mmed3 ( block[ptr[ lo ]+d], - block[ptr[ hi ]+d], - block[ptr[ (lo+hi)>>1 ]+d] ); - - unLo = ltLo = lo; - unHi = gtHi = hi; - - while (True) { - while (True) { - if (unLo > unHi) break; - n = ((Int32)block[ptr[unLo]+d]) - med; - if (n == 0) { - mswap(ptr[unLo], ptr[ltLo]); - ltLo++; unLo++; continue; - }; - if (n > 0) break; - unLo++; - } - while (True) { - if (unLo > unHi) break; - n = ((Int32)block[ptr[unHi]+d]) - med; - if (n == 0) { - mswap(ptr[unHi], ptr[gtHi]); - gtHi--; unHi--; continue; - }; - if (n < 0) break; - unHi--; - } - if (unLo > unHi) break; - mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--; - } - - AssertD ( unHi == unLo-1, "mainQSort3(2)" ); - - if (gtHi < ltLo) { - mpush(lo, hi, d+1 ); - continue; - } - - n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n); - m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m); - - n = lo + unLo - ltLo - 1; - m = hi - (gtHi - unHi) + 1; - - nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; - nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; - nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; - - if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); - if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); - if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); - - AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" ); - AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" ); - - mpush (nextLo[0], nextHi[0], nextD[0]); - mpush (nextLo[1], nextHi[1], nextD[1]); - mpush (nextLo[2], nextHi[2], nextD[2]); - } -} - -#undef mswap -#undef mvswap -#undef mpush -#undef mpop -#undef mmin -#undef mnextsize -#undef mnextswap -#undef MAIN_QSORT_SMALL_THRESH -#undef MAIN_QSORT_DEPTH_THRESH -#undef MAIN_QSORT_STACK_SIZE - - -/*---------------------------------------------*/ -/* Pre: - nblock > N_OVERSHOOT - block32 exists for [0 .. nblock-1 +N_OVERSHOOT] - ((UChar*)block32) [0 .. nblock-1] holds block - ptr exists for [0 .. nblock-1] - - Post: - ((UChar*)block32) [0 .. nblock-1] holds block - All other areas of block32 destroyed - ftab [0 .. 65536 ] destroyed - ptr [0 .. nblock-1] holds sorted order - if (*budget < 0), sorting was abandoned -*/ - -#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) -#define SETMASK (1 << 21) -#define CLEARMASK (~(SETMASK)) - -static -void mainSort ( UInt32* ptr, - UChar* block, - UInt16* quadrant, - UInt32* ftab, - Int32 nblock, - Int32 verb, - Int32* budget ) -{ - Int32 i, j, k, ss, sb; - Int32 runningOrder[256]; - Bool bigDone[256]; - Int32 copyStart[256]; - Int32 copyEnd [256]; - UChar c1; - Int32 numQSorted; - UInt16 s; - if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" ); - - /*-- set up the 2-byte frequency table --*/ - for (i = 65536; i >= 0; i--) ftab[i] = 0; - - j = block[0] << 8; - i = nblock-1; - for (; i >= 3; i -= 4) { - quadrant[i] = 0; - j = (j >> 8) | ( ((UInt16)block[i]) << 8); - ftab[j]++; - quadrant[i-1] = 0; - j = (j >> 8) | ( ((UInt16)block[i-1]) << 8); - ftab[j]++; - quadrant[i-2] = 0; - j = (j >> 8) | ( ((UInt16)block[i-2]) << 8); - ftab[j]++; - quadrant[i-3] = 0; - j = (j >> 8) | ( ((UInt16)block[i-3]) << 8); - ftab[j]++; - } - for (; i >= 0; i--) { - quadrant[i] = 0; - j = (j >> 8) | ( ((UInt16)block[i]) << 8); - ftab[j]++; - } - - /*-- (emphasises close relationship of block & quadrant) --*/ - for (i = 0; i < BZ_N_OVERSHOOT; i++) { - block [nblock+i] = block[i]; - quadrant[nblock+i] = 0; - } - - if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" ); - - /*-- Complete the initial radix sort --*/ - for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; - - s = block[0] << 8; - i = nblock-1; - for (; i >= 3; i -= 4) { - s = (s >> 8) | (block[i] << 8); - j = ftab[s] -1; - ftab[s] = j; - ptr[j] = i; - s = (s >> 8) | (block[i-1] << 8); - j = ftab[s] -1; - ftab[s] = j; - ptr[j] = i-1; - s = (s >> 8) | (block[i-2] << 8); - j = ftab[s] -1; - ftab[s] = j; - ptr[j] = i-2; - s = (s >> 8) | (block[i-3] << 8); - j = ftab[s] -1; - ftab[s] = j; - ptr[j] = i-3; - } - for (; i >= 0; i--) { - s = (s >> 8) | (block[i] << 8); - j = ftab[s] -1; - ftab[s] = j; - ptr[j] = i; - } - - /*-- - Now ftab contains the first loc of every small bucket. - Calculate the running order, from smallest to largest - big bucket. - --*/ - for (i = 0; i <= 255; i++) { - bigDone [i] = False; - runningOrder[i] = i; - } - - { - Int32 vv; - Int32 h = 1; - do h = 3 * h + 1; while (h <= 256); - do { - h = h / 3; - for (i = h; i <= 255; i++) { - vv = runningOrder[i]; - j = i; - while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { - runningOrder[j] = runningOrder[j-h]; - j = j - h; - if (j <= (h - 1)) goto zero; - } - zero: - runningOrder[j] = vv; - } - } while (h != 1); - } - - /*-- - The main sorting loop. - --*/ - - numQSorted = 0; - - for (i = 0; i <= 255; i++) { - - /*-- - Process big buckets, starting with the least full. - Basically this is a 3-step process in which we call - mainQSort3 to sort the small buckets [ss, j], but - also make a big effort to avoid the calls if we can. - --*/ - ss = runningOrder[i]; - - /*-- - Step 1: - Complete the big bucket [ss] by quicksorting - any unsorted small buckets [ss, j], for j != ss. - Hopefully previous pointer-scanning phases have already - completed many of the small buckets [ss, j], so - we don't have to sort them at all. - --*/ - for (j = 0; j <= 255; j++) { - if (j != ss) { - sb = (ss << 8) + j; - if ( ! (ftab[sb] & SETMASK) ) { - Int32 lo = ftab[sb] & CLEARMASK; - Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; - if (hi > lo) { - if (verb >= 4) - VPrintf4 ( " qsort [0x%x, 0x%x] " - "done %d this %d\n", - ss, j, numQSorted, hi - lo + 1 ); - mainQSort3 ( - ptr, block, quadrant, nblock, - lo, hi, BZ_N_RADIX, budget - ); - numQSorted += (hi - lo + 1); - if (*budget < 0) return; - } - } - ftab[sb] |= SETMASK; - } - } - - AssertH ( !bigDone[ss], 1006 ); - - /*-- - Step 2: - Now scan this big bucket [ss] so as to synthesise the - sorted order for small buckets [t, ss] for all t, - including, magically, the bucket [ss,ss] too. - This will avoid doing Real Work in subsequent Step 1's. - --*/ - { - for (j = 0; j <= 255; j++) { - copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; - copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; - } - for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { - k = ptr[j]-1; if (k < 0) k += nblock; - c1 = block[k]; - if (!bigDone[c1]) - ptr[ copyStart[c1]++ ] = k; - } - for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { - k = ptr[j]-1; if (k < 0) k += nblock; - c1 = block[k]; - if (!bigDone[c1]) - ptr[ copyEnd[c1]-- ] = k; - } - } - - AssertH ( (copyStart[ss]-1 == copyEnd[ss]) - || - /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. - Necessity for this case is demonstrated by compressing - a sequence of approximately 48.5 million of character - 251; 1.0.0/1.0.1 will then die here. */ - (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), - 1007 ) - - for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; - - /*-- - Step 3: - The [ss] big bucket is now done. Record this fact, - and update the quadrant descriptors. Remember to - update quadrants in the overshoot area too, if - necessary. The "if (i < 255)" test merely skips - this updating for the last bucket processed, since - updating for the last bucket is pointless. - - The quadrant array provides a way to incrementally - cache sort orderings, as they appear, so as to - make subsequent comparisons in fullGtU() complete - faster. For repetitive blocks this makes a big - difference (but not big enough to be able to avoid - the fallback sorting mechanism, exponential radix sort). - - The precise meaning is: at all times: - - for 0 <= i < nblock and 0 <= j <= nblock - - if block[i] != block[j], - - then the relative values of quadrant[i] and - quadrant[j] are meaningless. - - else { - if quadrant[i] < quadrant[j] - then the string starting at i lexicographically - precedes the string starting at j - - else if quadrant[i] > quadrant[j] - then the string starting at j lexicographically - precedes the string starting at i - - else - the relative ordering of the strings starting - at i and j has not yet been determined. - } - --*/ - bigDone[ss] = True; - - if (i < 255) { - Int32 bbStart = ftab[ss << 8] & CLEARMASK; - Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; - Int32 shifts = 0; - - while ((bbSize >> shifts) > 65534) shifts++; - - for (j = bbSize-1; j >= 0; j--) { - Int32 a2update = ptr[bbStart + j]; - UInt16 qVal = (UInt16)(j >> shifts); - quadrant[a2update] = qVal; - if (a2update < BZ_N_OVERSHOOT) - quadrant[a2update + nblock] = qVal; - } - AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); - } - - } - - if (verb >= 4) - VPrintf3 ( " %d pointers, %d sorted, %d scanned\n", - nblock, numQSorted, nblock - numQSorted ); -} - -#undef BIGFREQ -#undef SETMASK -#undef CLEARMASK - - -/*---------------------------------------------*/ -/* Pre: - nblock > 0 - arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] - ((UChar*)arr2) [0 .. nblock-1] holds block - arr1 exists for [0 .. nblock-1] - - Post: - ((UChar*)arr2) [0 .. nblock-1] holds block - All other areas of block destroyed - ftab [ 0 .. 65536 ] destroyed - arr1 [0 .. nblock-1] holds sorted order -*/ -void BZ2_blockSort ( EState* s ) -{ - UInt32* ptr = s->ptr; - UChar* block = s->block; - UInt32* ftab = s->ftab; - Int32 nblock = s->nblock; - Int32 verb = s->verbosity; - Int32 wfact = s->workFactor; - UInt16* quadrant; - Int32 budget; - Int32 budgetInit; - Int32 i; - - if (nblock < 10000) { - fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); - } else { - /* Calculate the location for quadrant, remembering to get - the alignment right. Assumes that &(block[0]) is at least - 2-byte aligned -- this should be ok since block is really - the first section of arr2. - */ - i = nblock+BZ_N_OVERSHOOT; - if (i & 1) i++; - quadrant = (UInt16*)(&(block[i])); - - /* (wfact-1) / 3 puts the default-factor-30 - transition point at very roughly the same place as - with v0.1 and v0.9.0. - Not that it particularly matters any more, since the - resulting compressed stream is now the same regardless - of whether or not we use the main sort or fallback sort. - */ - if (wfact < 1 ) wfact = 1; - if (wfact > 100) wfact = 100; - budgetInit = nblock * ((wfact-1) / 3); - budget = budgetInit; - - mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget ); - if (verb >= 3) - VPrintf3 ( " %d work, %d block, ratio %5.2f\n", - budgetInit - budget, - nblock, - (float)(budgetInit - budget) / - (float)(nblock==0 ? 1 : nblock) ); - if (budget < 0) { - if (verb >= 2) - VPrintf0 ( " too repetitive; using fallback" - " sorting algorithm\n" ); - fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); - } - } - - s->origPtr = -1; - for (i = 0; i < s->nblock; i++) - if (ptr[i] == 0) - { s->origPtr = i; break; }; - - AssertH( s->origPtr != -1, 1003 ); -} - - -/*-------------------------------------------------------------*/ -/*--- end blocksort.c ---*/ -/*-------------------------------------------------------------*/ diff --git a/bunzip2.1 b/bunzip2.1 new file mode 100644 index 0000000..ced3a73 --- /dev/null +++ b/bunzip2.1 @@ -0,0 +1,362 @@ +.TH BUNZIP2 1 "9 June 2003" +.SH NAME +bunzip2 \- a block-sorting file decompressor, v1.0.2gs1 +.br +bzcat \- decompresses files to stdout +.br +bzip2recover \- recovers data from damaged bzip2 files + +.SH SYNOPSIS +.br +.B bunzip2 +.RB [ " \-fkvsVL " ] +[ +.I "filenames \&..." +] +.br +.B bzcat +.RB [ " \-s " ] +[ +.I "filenames \&..." +] +.br +.B bzip2recover +.I "filename" + +.SH DESCRIPTION +.I bunzip2 +decompresses files created by +.I bzip2 +using the Burrows-Wheeler block sorting +text compression algorithm, and Huffman coding. +.I bzip2 +generally achieves +considerably better compression than that achieved by more conventional +LZ77/LZ78-based compressors, and approaches the performance of the PPM +family of statistical compressors. +.LP +The command-line options are deliberately very similar to +those of +.I GNU +.I gunzip, +but they are not identical. +.LP +.I bunzip2 +will by default not overwrite existing +files. If you want this to happen, specify the \-f flag. +.LP +.I bunzip2 +decompresses all specified files. Files which were not created by +.I bzip2 +will be detected and ignored, and a warning issued. +.I bunzip2 +attempts to guess the filename for the decompressed file +from that of the compressed file as follows: +.LP +.nf + filename.bz2 becomes filename + filename.bz becomes filename + filename.tbz2 becomes filename.tar + filename.tbz becomes filename.tar + anyothername becomes anyothername.out +.fi +.LP +If the file does not end in one of the recognised endings, +.I .bz2, +.I .bz, +.I .tbz2 +or +.I .tbz, +.I bunzip2 +complains that it cannot +guess the name of the original file, and uses the original name +with +.I .out +appended. +.LP +Supplying no filenames causes decompression from +standard input to standard output. +.LP +File name handling is +naive in the sense that there is no mechanism for preserving original +file names, permissions, ownerships or dates in operating systems or +filesystems which lack these concepts, or have serious file name length +restrictions, such as MS-DOS or GS/OS. +.LP +.I bunzip2 +will correctly decompress a file which is the +concatenation of two or more compressed files. The result is the +concatenation of the corresponding uncompressed files. Integrity +testing (\-t) +of concatenated +compressed files is also supported. +.LP +You can also decompress files to the standard output by +giving the \-c flag. Multiple files may be +decompressed like this. The resulting outputs are fed sequentially to stdout. +.LP +.I bzcat +(or +.I bunzip2 +.I \-c) +decompresses all specified files to +the standard output. +.LP +.I bunzip2 +will read arguments from the environment variables +.I BZIP2 +and +.I BZIP, +in that order, and will process them +before any arguments read from the command line. This gives a +convenient way to supply default arguments. +.LP +As a self-check for your protection, +.I bzip2 +and +.I bunzip2 +use 32-bit CRCs to +make sure that the decompressed version of a file is identical to the +original. This guards against corruption of the compressed data, and +against undetected bugs in +.I bzip2 +and +.I bunzip2 +(hopefully very unlikely). The +chances of data corruption going undetected are microscopic, about one +chance in four billion for each file processed. Be aware, though, that +the check occurs upon decompression, so it can only tell you that +something is wrong. It can't help you +recover the original uncompressed +data. You can use +.I bzip2recover +to try to recover data from +damaged files. +.LP +This manual page pertains to version 1.0.2gs1 of +.I bunzip2. +It is fully campatible with compressed data created with all of the previous +public releases of bzip2, versions +0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1, as well as version 1.0.2. +.LP +Return values: 0 for a normal exit, 1 for environmental problems (file +not found, invalid flags, I/O errors, &c), 2 to indicate a corrupt +compressed file, 3 for an internal consistency error (eg, bug) which +caused +.I bunzip2 +to panic. +.LP +.SH OPTIONS +.IP "\fB\-c\fP \fB\--stdout\fP" +Decompress to standard output. + +.IP "\fB\-d\fP \fB\--decompress\fP" +Force decompression. This flag is unnecessary on bunzip2 for GNO, +since it always decompresses. + +.IP "\fB\-t\fP \fB\--test\fP" +Check integrity of the specified file(s), but don't decompress them. +This really performs a trial decompression and throws away the result. + +.IP "\fB\-f\fP \fB\--force\fP" +Force overwrite of output files. Normally, +.I bunzip2 +will not overwrite +existing output files. +.sp +.I bunzip2 +normally declines to decompress files which don't have the +correct magic header bytes. If forced (-f), however, it will pass +such files through unmodified. This is how GNU gzip behaves. + +.IP "\fB\-k\fP \fB\--keep\fP" +Keep (don't delete) input files during decompression. + +.IP "\fB\-s\fP \fB\--small\fP" +Reduce memory usage, for decompression and testing. Files +are decompressed and tested using a modified algorithm which only +requires 2.5 bytes per block byte. This means any file can be +decompressed in 2300k of memory, albeit at about half the normal speed. +.sp +In short, if your machine is low on memory (5 megabytes or +less), you will probably need to use \-s. See MEMORY MANAGEMENT below. + +.IP "\fB\-q\fP \fB\--quiet\fP" +Suppress non-essential warning messages. Messages pertaining to +I/O errors and other critical events will not be suppressed. + +.IP "\fB\-v\fP \fB\--verbose\fP" +Verbose mode -- show the compression ratio for each file processed. +Further \-v's increase the verbosity level, spewing out lots of +information which is primarily of interest for diagnostic purposes. + +.IP "\fB\-L\fP \fB\--license\fP \fB\-V\fP \fB\--version\fP" +Display the software version, license terms and conditions. + +.IP "\fB\--\fP" +Treats all subsequent arguments as file names, even if they start +with a dash. This is so you can handle files with names beginning +with a dash, for example: bunzip2 \-- \-myfilename. +.LP +.SH MEMORY MANAGEMENT +.I bzip2 +compresses large files in blocks. The block size affects +both the compression ratio achieved, and the amount of memory needed for +compression and decompression. The block size can be specified +to be 100,000 bytes through 900,000 bytes (the +default). At decompression time, the block size used for +compression is read from the header of the compressed file, and +.I bunzip2 +then allocates itself just enough memory to decompress +the file. +.LP +Decompression requirements, in bytes, can be estimated as: +.LP +.nf + 100k + ( 4 x block size ), or + 100k + ( 2.5 x block size ) if using \-s +.fi +.LP +For files compressed with the default 900k block size, +.I bunzip2 +will require about 3700 kbytes to decompress. To support decompression +of any file on a 4 megabyte machine, +.I bunzip2 +has an option to +decompress using approximately half this amount of memory, about 2300 +kbytes. Decompression speed is also halved, so you should use this +option only where necessary. The relevant flag is -s. +.LP +Decompression speeds are virtually unaffected by block size. +.LP +Another significant point applies to files which fit in a single block +-- that means most files you'd encounter using a large block size. The +amount of real memory touched is proportional to the size of the file, +since the file is smaller than a block. For example, compressing a file +20,000 bytes long with a 900k block size will cause the decompressor to +allocate 3700k but only touch 100k + 20000 * 4 = 180 kbytes +when decompressing it. +.LP +Here is a table which summarises the maximum memory usage for different +block sizes. Also recorded is the total compressed size for 14 files of +the Calgary Text Compression Corpus totalling 3,141,622 bytes. This +column gives some feel for how compression varies with block size. +These figures tend to understate the advantage of larger block sizes for +larger files, since the Corpus is dominated by smaller files. +.LP +.nf + Block Decompress Decompress Corpus + Size usage -s usage Size +.fi +.LP +.nf + 100k 500k 350k 914704 + 200k 900k 600k 877703 + 300k 1300k 850k 860338 + 400k 1700k 1100k 846899 + 500k 2100k 1350k 845160 + 600k 2500k 1600k 838626 + 700k 2900k 1850k 834096 + 800k 3300k 2100k 828642 + 900k 3700k 2350k 828642 +.fi +.LP +.SH RECOVERING DATA FROM DAMAGED FILES +.I bzip2 +compresses files in blocks, usually 900kbytes long. Each +block is handled independently. If a media or transmission error causes +a multi-block .bz2 +file to become damaged, it may be possible to +recover data from the undamaged blocks in the file. +.LP +The compressed representation of each block is delimited by a 48-bit +pattern, which makes it possible to find the block boundaries with +reasonable certainty. Each block also carries its own 32-bit CRC, so +damaged blocks can be distinguished from undamaged ones. +.LP +.I bzip2recover +is a simple program whose purpose is to search for blocks in .bz2 files, +and write each block out into its own .bz2 file. You can then use +.I bunzip2 +\-t +to test the +integrity of the resulting files, and decompress those which are +undamaged. +.LP +.I bzip2recover +takes a single argument, the name of the damaged file, +and writes a number of files named "rec0001file.bz2", +"rec0002file.bz2", etc, containing the extracted blocks. +The output filenames are designed so that the use of +wildcards in subsequent processing -- for example, +"bunzip2 -c rec*file.bz2 > recovered_data" -- processes the files in +the correct order. +.LP +.I bzip2recover +should be of most use dealing with large .bz2 +files, as these will contain many blocks. It is clearly +futile to use it on damaged single-block files, since a +damaged block cannot be recovered. If you wish to minimise +any potential data loss through media or transmission errors, +you might consider compressing with a smaller +block size. +.LP +.SH PERFORMANCE NOTES +.I bunzip2 +usually allocates several megabytes of memory to operate +in, and then charges all over it in a fairly random fashion. This means +that performance is largely determined by the speed at which your machine can +access main memory or (if you have a caching accelerator) serve cache misses. +Because of this, small changes to the code to reduce the miss rate have +been observed to give disproportionately large performance improvements. +I imagine that +.I bunzip2 +will perform best on machines with very large caches. +.LP +.SH CAVEATS +I/O error messages are not as helpful as they could be. +.I bunzip2 +tries hard to detect I/O errors and exit cleanly, but the details of +what the problem is sometimes seem rather misleading. +.LP +.I bzip2recover +for GNO uses 32-bit integers to represent bit positions in compressed files, +so it cannot handle compressed files more than 512 megabytes long. +.LP + +.SH AUTHOR +Julian Seward, jseward@acm.org. +.LP +http://sources.redhat.com/bzip2 +.LP +The ideas embodied in +.I bzip2 +are due to (at least) the following +people: Michael Burrows and David Wheeler (for the block sorting +transformation), David Wheeler (again, for the Huffman coder), Peter +Fenwick (for the structured coding model in the original +.I bzip, +and many refinements), and Alistair Moffat, Radford Neal and Ian Witten +(for the arithmetic coder in the original +.I bzip). +I am much +indebted for their help, support and advice. See the manual in the +source distribution for pointers to sources of documentation. Christian +von Roques encouraged me to look for faster sorting algorithms, so as to +speed up compression. Bela Lubkin encouraged me to improve the +worst-case compression performance. Many people sent patches, helped +with portability problems, lent machines, gave advice and were generally +helpful. +.LP +This version of +.I bunzip2 +for GNO has been ported by Stephen Heumann from +Julian Seward's +.I bzip2 +version 1.0.2 for other platforms. +.LP +This program contains material from the ORCA/C Run-Time Libraries, +copyright 1987-1996 by Byte Works, Inc. Used with permission. +.LP +It also incorporates a public domain stristr routine by Fred Cole, +Bob Stout, and Greg Thayer, which was obtained from http://www.snippets.org . diff --git a/bunzip2.desc b/bunzip2.desc new file mode 100644 index 0000000..cfd7d3f --- /dev/null +++ b/bunzip2.desc @@ -0,0 +1,13 @@ +Name: bunzip2 +Version: 1.0.2 +Shell: GNO/ME +Author: Stephen Heumann (GNO port of original code by Julian Seward) +Contact: sheumann@myrealbox.com +Where: /usr/local/bin +FTP: ftp.gno.org + + Decompression program for files compressed in the bzip2 format. Based +on Julian Seward's bzip2 program, but only supports file decompression and +testing, not compression. Can also be used as bzcat, writing decompressed +data to stdout. Also includes bzip2recover program for restoring data +from partially corrupted bzip2 archives. diff --git a/bunzip2.rez b/bunzip2.rez new file mode 100644 index 0000000..51753cf --- /dev/null +++ b/bunzip2.rez @@ -0,0 +1,15 @@ +#include "/lang/orca/libraries/rinclude/Types.Rez" + +resource rVersion (0x1, purgeable3, nocrossbank) { + + { 1, 0, 2, /* version 1.0.2 */ + release, /* development|alpha|beta|final|release */ + 0 /* non-final release number */ + }, + verUS, /* country code -- only some are avail */ + "bunzip2", /* name */ + /* _Very_ brief descrition. Check "file info" */ + /* shown in the Finder to see if it's too long */ + /* Note that \n is used to separate lines here. */ + "Bzip2 archive decompression program\n" +}; diff --git a/bzcat.1 b/bzcat.1 new file mode 100644 index 0000000..9f972a0 --- /dev/null +++ b/bzcat.1 @@ -0,0 +1 @@ +.so man1/bunzip2.1 diff --git a/bzdiff b/bzdiff deleted file mode 100644 index 3c2eb85..0000000 --- a/bzdiff +++ /dev/null @@ -1,76 +0,0 @@ -#!/bin/sh -# sh is buggy on RS/6000 AIX 3.2. Replace above line with #!/bin/ksh - -# Bzcmp/diff wrapped for bzip2, -# adapted from zdiff by Philippe Troin for Debian GNU/Linux. - -# Bzcmp and bzdiff are used to invoke the cmp or the diff pro- -# gram on compressed files. All options specified are passed -# directly to cmp or diff. If only 1 file is specified, then -# the files compared are file1 and an uncompressed file1.gz. -# If two files are specified, then they are uncompressed (if -# necessary) and fed to cmp or diff. The exit status from cmp -# or diff is preserved. - -PATH="/usr/bin:$PATH"; export PATH -prog=`echo $0 | sed 's|.*/||'` -case "$prog" in - *cmp) comp=${CMP-cmp} ;; - *) comp=${DIFF-diff} ;; -esac - -OPTIONS= -FILES= -for ARG -do - case "$ARG" in - -*) OPTIONS="$OPTIONS $ARG";; - *) if test -f "$ARG"; then - FILES="$FILES $ARG" - else - echo "${prog}: $ARG not found or not a regular file" - exit 1 - fi ;; - esac -done -if test -z "$FILES"; then - echo "Usage: $prog [${comp}_options] file [file]" - exit 1 -fi -tmp=`tempfile -d /tmp -p bz` || { - echo 'cannot create a temporary file' >&2 - exit 1 -} -set $FILES -if test $# -eq 1; then - FILE=`echo "$1" | sed 's/.bz2$//'` - bzip2 -cd "$FILE.bz2" | $comp $OPTIONS - "$FILE" - STAT="$?" - -elif test $# -eq 2; then - case "$1" in - *.bz2) - case "$2" in - *.bz2) - F=`echo "$2" | sed 's|.*/||;s|.bz2$||'` - bzip2 -cdfq "$2" > $tmp - bzip2 -cdfq "$1" | $comp $OPTIONS - $tmp - STAT="$?" - /bin/rm -f $tmp;; - - *) bzip2 -cdfq "$1" | $comp $OPTIONS - "$2" - STAT="$?";; - esac;; - *) case "$2" in - *.bz2) - bzip2 -cdfq "$2" | $comp $OPTIONS "$1" - - STAT="$?";; - *) $comp $OPTIONS "$1" "$2" - STAT="$?";; - esac;; - esac - exit "$STAT" -else - echo "Usage: $prog [${comp}_options] file [file]" - exit 1 -fi diff --git a/bzdiff.1 b/bzdiff.1 deleted file mode 100644 index adb7a8e..0000000 --- a/bzdiff.1 +++ /dev/null @@ -1,47 +0,0 @@ -\"Shamelessly copied from zmore.1 by Philippe Troin -\"for Debian GNU/Linux -.TH BZDIFF 1 -.SH NAME -bzcmp, bzdiff \- compare bzip2 compressed files -.SH SYNOPSIS -.B bzcmp -[ cmp_options ] file1 -[ file2 ] -.br -.B bzdiff -[ diff_options ] file1 -[ file2 ] -.SH DESCRIPTION -.I Bzcmp -and -.I bzdiff -are used to invoke the -.I cmp -or the -.I diff -program on bzip2 compressed files. All options specified are passed -directly to -.I cmp -or -.IR diff "." -If only 1 file is specified, then the files compared are -.I file1 -and an uncompressed -.IR file1 ".bz2." -If two files are specified, then they are uncompressed if necessary and fed to -.I cmp -or -.IR diff "." -The exit status from -.I cmp -or -.I diff -is preserved. -.SH "SEE ALSO" -cmp(1), diff(1), bzmore(1), bzless(1), bzgrep(1), bzip2(1) -.SH BUGS -Messages from the -.I cmp -or -.I diff -programs refer to temporary filenames instead of those specified. diff --git a/bzgrep b/bzgrep deleted file mode 100644 index dbfc00e..0000000 --- a/bzgrep +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/sh - -# Bzgrep wrapped for bzip2, -# adapted from zgrep by Philippe Troin for Debian GNU/Linux. -## zgrep notice: -## zgrep -- a wrapper around a grep program that decompresses files as needed -## Adapted from a version sent by Charles Levert - -PATH="/usr/bin:$PATH"; export PATH - -prog=`echo $0 | sed 's|.*/||'` -case "$prog" in - *egrep) grep=${EGREP-egrep} ;; - *fgrep) grep=${FGREP-fgrep} ;; - *) grep=${GREP-grep} ;; -esac -pat="" -while test $# -ne 0; do - case "$1" in - -e | -f) opt="$opt $1"; shift; pat="$1" - if test "$grep" = grep; then # grep is buggy with -e on SVR4 - grep=egrep - fi;; - -A | -B) opt="$opt $1 $2"; shift;; - -*) opt="$opt $1";; - *) if test -z "$pat"; then - pat="$1" - else - break; - fi;; - esac - shift -done - -if test -z "$pat"; then - echo "grep through bzip2 files" - echo "usage: $prog [grep_options] pattern [files]" - exit 1 -fi - -list=0 -silent=0 -op=`echo "$opt" | sed -e 's/ //g' -e 's/-//g'` -case "$op" in - *l*) list=1 -esac -case "$op" in - *h*) silent=1 -esac - -if test $# -eq 0; then - bzip2 -cdfq | $grep $opt "$pat" - exit $? -fi - -res=0 -for i do - if test -f "$i"; then :; else if test -f "$i.bz2"; then i="$i.bz2"; fi; fi - if test $list -eq 1; then - bzip2 -cdfq "$i" | $grep $opt "$pat" 2>&1 > /dev/null && echo $i - r=$? - elif test $# -eq 1 -o $silent -eq 1; then - bzip2 -cdfq "$i" | $grep $opt "$pat" - r=$? - else - bzip2 -cdfq "$i" | $grep $opt "$pat" | sed "s|^|${i}:|" - r=$? - fi - test "$r" -ne 0 && res="$r" -done -exit $res diff --git a/bzgrep.1 b/bzgrep.1 deleted file mode 100644 index 930af8c..0000000 --- a/bzgrep.1 +++ /dev/null @@ -1,56 +0,0 @@ -\"Shamelessly copied from zmore.1 by Philippe Troin -\"for Debian GNU/Linux -.TH BZGREP 1 -.SH NAME -bzgrep, bzfgrep, bzegrep \- search possibly bzip2 compressed files for a regular expression -.SH SYNOPSIS -.B bzgrep -[ grep_options ] -.BI [\ -e\ ] " pattern" -.IR filename ".\|.\|." -.br -.B bzegrep -[ egrep_options ] -.BI [\ -e\ ] " pattern" -.IR filename ".\|.\|." -.br -.B bzfgrep -[ fgrep_options ] -.BI [\ -e\ ] " pattern" -.IR filename ".\|.\|." -.SH DESCRIPTION -.IR Bzgrep -is used to invoke the -.I grep -on bzip2-compressed files. All options specified are passed directly to -.I grep. -If no file is specified, then the standard input is decompressed -if necessary and fed to grep. -Otherwise the given files are uncompressed if necessary and fed to -.I grep. -.PP -If -.I bzgrep -is invoked as -.I bzegrep -or -.I bzfgrep -then -.I egrep -or -.I fgrep -is used instead of -.I grep. -If the GREP environment variable is set, -.I bzgrep -uses it as the -.I grep -program to be invoked. For example: - - for sh: GREP=fgrep bzgrep string files - for csh: (setenv GREP fgrep; bzgrep string files) -.SH AUTHOR -Charles Levert (charles@comm.polymtl.ca). Adapted to bzip2 by Philippe -Troin for Debian GNU/Linux. -.SH "SEE ALSO" -grep(1), egrep(1), fgrep(1), bzdiff(1), bzmore(1), bzless(1), bzip2(1) diff --git a/bzip2.1 b/bzip2.1 deleted file mode 100644 index 623435c..0000000 --- a/bzip2.1 +++ /dev/null @@ -1,453 +0,0 @@ -.PU -.TH bzip2 1 -.SH NAME -bzip2, bunzip2 \- a block-sorting file compressor, v1.0.2 -.br -bzcat \- decompresses files to stdout -.br -bzip2recover \- recovers data from damaged bzip2 files - -.SH SYNOPSIS -.ll +8 -.B bzip2 -.RB [ " \-cdfkqstvzVL123456789 " ] -[ -.I "filenames \&..." -] -.ll -8 -.br -.B bunzip2 -.RB [ " \-fkvsVL " ] -[ -.I "filenames \&..." -] -.br -.B bzcat -.RB [ " \-s " ] -[ -.I "filenames \&..." -] -.br -.B bzip2recover -.I "filename" - -.SH DESCRIPTION -.I bzip2 -compresses files using the Burrows-Wheeler block sorting -text compression algorithm, and Huffman coding. Compression is -generally considerably better than that achieved by more conventional -LZ77/LZ78-based compressors, and approaches the performance of the PPM -family of statistical compressors. - -The command-line options are deliberately very similar to -those of -.I GNU gzip, -but they are not identical. - -.I bzip2 -expects a list of file names to accompany the -command-line flags. Each file is replaced by a compressed version of -itself, with the name "original_name.bz2". -Each compressed file -has the same modification date, permissions, and, when possible, -ownership as the corresponding original, so that these properties can -be correctly restored at decompression time. File name handling is -naive in the sense that there is no mechanism for preserving original -file names, permissions, ownerships or dates in filesystems which lack -these concepts, or have serious file name length restrictions, such as -MS-DOS. - -.I bzip2 -and -.I bunzip2 -will by default not overwrite existing -files. If you want this to happen, specify the \-f flag. - -If no file names are specified, -.I bzip2 -compresses from standard -input to standard output. In this case, -.I bzip2 -will decline to -write compressed output to a terminal, as this would be entirely -incomprehensible and therefore pointless. - -.I bunzip2 -(or -.I bzip2 \-d) -decompresses all -specified files. Files which were not created by -.I bzip2 -will be detected and ignored, and a warning issued. -.I bzip2 -attempts to guess the filename for the decompressed file -from that of the compressed file as follows: - - filename.bz2 becomes filename - filename.bz becomes filename - filename.tbz2 becomes filename.tar - filename.tbz becomes filename.tar - anyothername becomes anyothername.out - -If the file does not end in one of the recognised endings, -.I .bz2, -.I .bz, -.I .tbz2 -or -.I .tbz, -.I bzip2 -complains that it cannot -guess the name of the original file, and uses the original name -with -.I .out -appended. - -As with compression, supplying no -filenames causes decompression from -standard input to standard output. - -.I bunzip2 -will correctly decompress a file which is the -concatenation of two or more compressed files. The result is the -concatenation of the corresponding uncompressed files. Integrity -testing (\-t) -of concatenated -compressed files is also supported. - -You can also compress or decompress files to the standard output by -giving the \-c flag. Multiple files may be compressed and -decompressed like this. The resulting outputs are fed sequentially to -stdout. Compression of multiple files -in this manner generates a stream -containing multiple compressed file representations. Such a stream -can be decompressed correctly only by -.I bzip2 -version 0.9.0 or -later. Earlier versions of -.I bzip2 -will stop after decompressing -the first file in the stream. - -.I bzcat -(or -.I bzip2 -dc) -decompresses all specified files to -the standard output. - -.I bzip2 -will read arguments from the environment variables -.I BZIP2 -and -.I BZIP, -in that order, and will process them -before any arguments read from the command line. This gives a -convenient way to supply default arguments. - -Compression is always performed, even if the compressed -file is slightly -larger than the original. Files of less than about one hundred bytes -tend to get larger, since the compression mechanism has a constant -overhead in the region of 50 bytes. Random data (including the output -of most file compressors) is coded at about 8.05 bits per byte, giving -an expansion of around 0.5%. - -As a self-check for your protection, -.I -bzip2 -uses 32-bit CRCs to -make sure that the decompressed version of a file is identical to the -original. This guards against corruption of the compressed data, and -against undetected bugs in -.I bzip2 -(hopefully very unlikely). The -chances of data corruption going undetected is microscopic, about one -chance in four billion for each file processed. Be aware, though, that -the check occurs upon decompression, so it can only tell you that -something is wrong. It can't help you -recover the original uncompressed -data. You can use -.I bzip2recover -to try to recover data from -damaged files. - -Return values: 0 for a normal exit, 1 for environmental problems (file -not found, invalid flags, I/O errors, &c), 2 to indicate a corrupt -compressed file, 3 for an internal consistency error (eg, bug) which -caused -.I bzip2 -to panic. - -.SH OPTIONS -.TP -.B \-c --stdout -Compress or decompress to standard output. -.TP -.B \-d --decompress -Force decompression. -.I bzip2, -.I bunzip2 -and -.I bzcat -are -really the same program, and the decision about what actions to take is -done on the basis of which name is used. This flag overrides that -mechanism, and forces -.I bzip2 -to decompress. -.TP -.B \-z --compress -The complement to \-d: forces compression, regardless of the -invocation name. -.TP -.B \-t --test -Check integrity of the specified file(s), but don't decompress them. -This really performs a trial decompression and throws away the result. -.TP -.B \-f --force -Force overwrite of output files. Normally, -.I bzip2 -will not overwrite -existing output files. Also forces -.I bzip2 -to break hard links -to files, which it otherwise wouldn't do. - -bzip2 normally declines to decompress files which don't have the -correct magic header bytes. If forced (-f), however, it will pass -such files through unmodified. This is how GNU gzip behaves. -.TP -.B \-k --keep -Keep (don't delete) input files during compression -or decompression. -.TP -.B \-s --small -Reduce memory usage, for compression, decompression and testing. Files -are decompressed and tested using a modified algorithm which only -requires 2.5 bytes per block byte. This means any file can be -decompressed in 2300k of memory, albeit at about half the normal speed. - -During compression, \-s selects a block size of 200k, which limits -memory use to around the same figure, at the expense of your compression -ratio. In short, if your machine is low on memory (8 megabytes or -less), use \-s for everything. See MEMORY MANAGEMENT below. -.TP -.B \-q --quiet -Suppress non-essential warning messages. Messages pertaining to -I/O errors and other critical events will not be suppressed. -.TP -.B \-v --verbose -Verbose mode -- show the compression ratio for each file processed. -Further \-v's increase the verbosity level, spewing out lots of -information which is primarily of interest for diagnostic purposes. -.TP -.B \-L --license -V --version -Display the software version, license terms and conditions. -.TP -.B \-1 (or \-\-fast) to \-9 (or \-\-best) -Set the block size to 100 k, 200 k .. 900 k when compressing. Has no -effect when decompressing. See MEMORY MANAGEMENT below. -The \-\-fast and \-\-best aliases are primarily for GNU gzip -compatibility. In particular, \-\-fast doesn't make things -significantly faster. -And \-\-best merely selects the default behaviour. -.TP -.B \-- -Treats all subsequent arguments as file names, even if they start -with a dash. This is so you can handle files with names beginning -with a dash, for example: bzip2 \-- \-myfilename. -.TP -.B \--repetitive-fast --repetitive-best -These flags are redundant in versions 0.9.5 and above. They provided -some coarse control over the behaviour of the sorting algorithm in -earlier versions, which was sometimes useful. 0.9.5 and above have an -improved algorithm which renders these flags irrelevant. - -.SH MEMORY MANAGEMENT -.I bzip2 -compresses large files in blocks. The block size affects -both the compression ratio achieved, and the amount of memory needed for -compression and decompression. The flags \-1 through \-9 -specify the block size to be 100,000 bytes through 900,000 bytes (the -default) respectively. At decompression time, the block size used for -compression is read from the header of the compressed file, and -.I bunzip2 -then allocates itself just enough memory to decompress -the file. Since block sizes are stored in compressed files, it follows -that the flags \-1 to \-9 are irrelevant to and so ignored -during decompression. - -Compression and decompression requirements, -in bytes, can be estimated as: - - Compression: 400k + ( 8 x block size ) - - Decompression: 100k + ( 4 x block size ), or - 100k + ( 2.5 x block size ) - -Larger block sizes give rapidly diminishing marginal returns. Most of -the compression comes from the first two or three hundred k of block -size, a fact worth bearing in mind when using -.I bzip2 -on small machines. -It is also important to appreciate that the decompression memory -requirement is set at compression time by the choice of block size. - -For files compressed with the default 900k block size, -.I bunzip2 -will require about 3700 kbytes to decompress. To support decompression -of any file on a 4 megabyte machine, -.I bunzip2 -has an option to -decompress using approximately half this amount of memory, about 2300 -kbytes. Decompression speed is also halved, so you should use this -option only where necessary. The relevant flag is -s. - -In general, try and use the largest block size memory constraints allow, -since that maximises the compression achieved. Compression and -decompression speed are virtually unaffected by block size. - -Another significant point applies to files which fit in a single block --- that means most files you'd encounter using a large block size. The -amount of real memory touched is proportional to the size of the file, -since the file is smaller than a block. For example, compressing a file -20,000 bytes long with the flag -9 will cause the compressor to -allocate around 7600k of memory, but only touch 400k + 20000 * 8 = 560 -kbytes of it. Similarly, the decompressor will allocate 3700k but only -touch 100k + 20000 * 4 = 180 kbytes. - -Here is a table which summarises the maximum memory usage for different -block sizes. Also recorded is the total compressed size for 14 files of -the Calgary Text Compression Corpus totalling 3,141,622 bytes. This -column gives some feel for how compression varies with block size. -These figures tend to understate the advantage of larger block sizes for -larger files, since the Corpus is dominated by smaller files. - - Compress Decompress Decompress Corpus - Flag usage usage -s usage Size - - -1 1200k 500k 350k 914704 - -2 2000k 900k 600k 877703 - -3 2800k 1300k 850k 860338 - -4 3600k 1700k 1100k 846899 - -5 4400k 2100k 1350k 845160 - -6 5200k 2500k 1600k 838626 - -7 6100k 2900k 1850k 834096 - -8 6800k 3300k 2100k 828642 - -9 7600k 3700k 2350k 828642 - -.SH RECOVERING DATA FROM DAMAGED FILES -.I bzip2 -compresses files in blocks, usually 900kbytes long. Each -block is handled independently. If a media or transmission error causes -a multi-block .bz2 -file to become damaged, it may be possible to -recover data from the undamaged blocks in the file. - -The compressed representation of each block is delimited by a 48-bit -pattern, which makes it possible to find the block boundaries with -reasonable certainty. Each block also carries its own 32-bit CRC, so -damaged blocks can be distinguished from undamaged ones. - -.I bzip2recover -is a simple program whose purpose is to search for -blocks in .bz2 files, and write each block out into its own .bz2 -file. You can then use -.I bzip2 -\-t -to test the -integrity of the resulting files, and decompress those which are -undamaged. - -.I bzip2recover -takes a single argument, the name of the damaged file, -and writes a number of files "rec00001file.bz2", -"rec00002file.bz2", etc, containing the extracted blocks. -The output filenames are designed so that the use of -wildcards in subsequent processing -- for example, -"bzip2 -dc rec*file.bz2 > recovered_data" -- processes the files in -the correct order. - -.I bzip2recover -should be of most use dealing with large .bz2 -files, as these will contain many blocks. It is clearly -futile to use it on damaged single-block files, since a -damaged block cannot be recovered. If you wish to minimise -any potential data loss through media or transmission errors, -you might consider compressing with a smaller -block size. - -.SH PERFORMANCE NOTES -The sorting phase of compression gathers together similar strings in the -file. Because of this, files containing very long runs of repeated -symbols, like "aabaabaabaab ..." (repeated several hundred times) may -compress more slowly than normal. Versions 0.9.5 and above fare much -better than previous versions in this respect. The ratio between -worst-case and average-case compression time is in the region of 10:1. -For previous versions, this figure was more like 100:1. You can use the -\-vvvv option to monitor progress in great detail, if you want. - -Decompression speed is unaffected by these phenomena. - -.I bzip2 -usually allocates several megabytes of memory to operate -in, and then charges all over it in a fairly random fashion. This means -that performance, both for compressing and decompressing, is largely -determined by the speed at which your machine can service cache misses. -Because of this, small changes to the code to reduce the miss rate have -been observed to give disproportionately large performance improvements. -I imagine -.I bzip2 -will perform best on machines with very large caches. - -.SH CAVEATS -I/O error messages are not as helpful as they could be. -.I bzip2 -tries hard to detect I/O errors and exit cleanly, but the details of -what the problem is sometimes seem rather misleading. - -This manual page pertains to version 1.0.2 of -.I bzip2. -Compressed data created by this version is entirely forwards and -backwards compatible with the previous public releases, versions -0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1, but with the following -exception: 0.9.0 and above can correctly decompress multiple -concatenated compressed files. 0.1pl2 cannot do this; it will stop -after decompressing just the first file in the stream. - -.I bzip2recover -versions prior to this one, 1.0.2, used 32-bit integers to represent -bit positions in compressed files, so it could not handle compressed -files more than 512 megabytes long. Version 1.0.2 and above uses -64-bit ints on some platforms which support them (GNU supported -targets, and Windows). To establish whether or not bzip2recover was -built with such a limitation, run it without arguments. In any event -you can build yourself an unlimited version if you can recompile it -with MaybeUInt64 set to be an unsigned 64-bit integer. - - - -.SH AUTHOR -Julian Seward, jseward@acm.org. - -http://sources.redhat.com/bzip2 - -The ideas embodied in -.I bzip2 -are due to (at least) the following -people: Michael Burrows and David Wheeler (for the block sorting -transformation), David Wheeler (again, for the Huffman coder), Peter -Fenwick (for the structured coding model in the original -.I bzip, -and many refinements), and Alistair Moffat, Radford Neal and Ian Witten -(for the arithmetic coder in the original -.I bzip). -I am much -indebted for their help, support and advice. See the manual in the -source distribution for pointers to sources of documentation. Christian -von Roques encouraged me to look for faster sorting algorithms, so as to -speed up compression. Bela Lubkin encouraged me to improve the -worst-case compression performance. -The bz* scripts are derived from those of GNU gzip. -Many people sent patches, helped -with portability problems, lent machines, gave advice and were generally -helpful. diff --git a/bzip2.1.preformatted b/bzip2.1.preformatted deleted file mode 100644 index 0f20cb5..0000000 --- a/bzip2.1.preformatted +++ /dev/null @@ -1,398 +0,0 @@ -bzip2(1) bzip2(1) - - - -NNAAMMEE - bzip2, bunzip2 - a block-sorting file compressor, v1.0.2 - bzcat - decompresses files to stdout - bzip2recover - recovers data from damaged bzip2 files - - -SSYYNNOOPPSSIISS - bbzziipp22 [ --ccddffkkqqssttvvzzVVLL112233445566778899 ] [ _f_i_l_e_n_a_m_e_s _._._. ] - bbuunnzziipp22 [ --ffkkvvssVVLL ] [ _f_i_l_e_n_a_m_e_s _._._. ] - bbzzccaatt [ --ss ] [ _f_i_l_e_n_a_m_e_s _._._. ] - bbzziipp22rreeccoovveerr _f_i_l_e_n_a_m_e - - -DDEESSCCRRIIPPTTIIOONN - _b_z_i_p_2 compresses files using the Burrows-Wheeler block - sorting text compression algorithm, and Huffman coding. - Compression is generally considerably better than that - achieved by more conventional LZ77/LZ78-based compressors, - and approaches the performance of the PPM family of sta­ - tistical compressors. - - The command-line options are deliberately very similar to - those of _G_N_U _g_z_i_p_, but they are not identical. - - _b_z_i_p_2 expects a list of file names to accompany the com­ - mand-line flags. Each file is replaced by a compressed - version of itself, with the name "original_name.bz2". - Each compressed file has the same modification date, per­ - missions, and, when possible, ownership as the correspond­ - ing original, so that these properties can be correctly - restored at decompression time. File name handling is - naive in the sense that there is no mechanism for preserv­ - ing original file names, permissions, ownerships or dates - in filesystems which lack these concepts, or have serious - file name length restrictions, such as MS-DOS. - - _b_z_i_p_2 and _b_u_n_z_i_p_2 will by default not overwrite existing - files. If you want this to happen, specify the -f flag. - - If no file names are specified, _b_z_i_p_2 compresses from - standard input to standard output. In this case, _b_z_i_p_2 - will decline to write compressed output to a terminal, as - this would be entirely incomprehensible and therefore - pointless. - - _b_u_n_z_i_p_2 (or _b_z_i_p_2 _-_d_) decompresses all specified files. - Files which were not created by _b_z_i_p_2 will be detected and - ignored, and a warning issued. _b_z_i_p_2 attempts to guess - the filename for the decompressed file from that of the - compressed file as follows: - - filename.bz2 becomes filename - filename.bz becomes filename - filename.tbz2 becomes filename.tar - filename.tbz becomes filename.tar - anyothername becomes anyothername.out - - If the file does not end in one of the recognised endings, - _._b_z_2_, _._b_z_, _._t_b_z_2 or _._t_b_z_, _b_z_i_p_2 complains that it cannot - guess the name of the original file, and uses the original - name with _._o_u_t appended. - - As with compression, supplying no filenames causes decom­ - pression from standard input to standard output. - - _b_u_n_z_i_p_2 will correctly decompress a file which is the con­ - catenation of two or more compressed files. The result is - the concatenation of the corresponding uncompressed files. - Integrity testing (-t) of concatenated compressed files is - also supported. - - You can also compress or decompress files to the standard - output by giving the -c flag. Multiple files may be com­ - pressed and decompressed like this. The resulting outputs - are fed sequentially to stdout. Compression of multiple - files in this manner generates a stream containing multi­ - ple compressed file representations. Such a stream can be - decompressed correctly only by _b_z_i_p_2 version 0.9.0 or - later. Earlier versions of _b_z_i_p_2 will stop after decom­ - pressing the first file in the stream. - - _b_z_c_a_t (or _b_z_i_p_2 _-_d_c_) decompresses all specified files to - the standard output. - - _b_z_i_p_2 will read arguments from the environment variables - _B_Z_I_P_2 and _B_Z_I_P_, in that order, and will process them - before any arguments read from the command line. This - gives a convenient way to supply default arguments. - - Compression is always performed, even if the compressed - file is slightly larger than the original. Files of less - than about one hundred bytes tend to get larger, since the - compression mechanism has a constant overhead in the - region of 50 bytes. Random data (including the output of - most file compressors) is coded at about 8.05 bits per - byte, giving an expansion of around 0.5%. - - As a self-check for your protection, _b_z_i_p_2 uses 32-bit - CRCs to make sure that the decompressed version of a file - is identical to the original. This guards against corrup­ - tion of the compressed data, and against undetected bugs - in _b_z_i_p_2 (hopefully very unlikely). The chances of data - corruption going undetected is microscopic, about one - chance in four billion for each file processed. Be aware, - though, that the check occurs upon decompression, so it - can only tell you that something is wrong. It can't help - you recover the original uncompressed data. You can use - _b_z_i_p_2_r_e_c_o_v_e_r to try to recover data from damaged files. - - Return values: 0 for a normal exit, 1 for environmental - problems (file not found, invalid flags, I/O errors, &c), - 2 to indicate a corrupt compressed file, 3 for an internal - consistency error (eg, bug) which caused _b_z_i_p_2 to panic. - - -OOPPTTIIOONNSS - --cc ----ssttddoouutt - Compress or decompress to standard output. - - --dd ----ddeeccoommpprreessss - Force decompression. _b_z_i_p_2_, _b_u_n_z_i_p_2 and _b_z_c_a_t are - really the same program, and the decision about - what actions to take is done on the basis of which - name is used. This flag overrides that mechanism, - and forces _b_z_i_p_2 to decompress. - - --zz ----ccoommpprreessss - The complement to -d: forces compression, - regardless of the invocation name. - - --tt ----tteesstt - Check integrity of the specified file(s), but don't - decompress them. This really performs a trial - decompression and throws away the result. - - --ff ----ffoorrccee - Force overwrite of output files. Normally, _b_z_i_p_2 - will not overwrite existing output files. Also - forces _b_z_i_p_2 to break hard links to files, which it - otherwise wouldn't do. - - bzip2 normally declines to decompress files which - don't have the correct magic header bytes. If - forced (-f), however, it will pass such files - through unmodified. This is how GNU gzip behaves. - - --kk ----kkeeeepp - Keep (don't delete) input files during compression - or decompression. - - --ss ----ssmmaallll - Reduce memory usage, for compression, decompression - and testing. Files are decompressed and tested - using a modified algorithm which only requires 2.5 - bytes per block byte. This means any file can be - decompressed in 2300k of memory, albeit at about - half the normal speed. - - During compression, -s selects a block size of - 200k, which limits memory use to around the same - figure, at the expense of your compression ratio. - In short, if your machine is low on memory (8 - megabytes or less), use -s for everything. See - MEMORY MANAGEMENT below. - - --qq ----qquuiieett - Suppress non-essential warning messages. Messages - pertaining to I/O errors and other critical events - will not be suppressed. - - --vv ----vveerrbboossee - Verbose mode -- show the compression ratio for each - file processed. Further -v's increase the ver­ - bosity level, spewing out lots of information which - is primarily of interest for diagnostic purposes. - - --LL ----lliicceennssee --VV ----vveerrssiioonn - Display the software version, license terms and - conditions. - - --11 ((oorr ----ffaasstt)) ttoo --99 ((oorr ----bbeesstt)) - Set the block size to 100 k, 200 k .. 900 k when - compressing. Has no effect when decompressing. - See MEMORY MANAGEMENT below. The --fast and --best - aliases are primarily for GNU gzip compatibility. - In particular, --fast doesn't make things signifi­ - cantly faster. And --best merely selects the - default behaviour. - - ---- Treats all subsequent arguments as file names, even - if they start with a dash. This is so you can han­ - dle files with names beginning with a dash, for - example: bzip2 -- -myfilename. - - ----rreeppeettiittiivvee--ffaasstt ----rreeppeettiittiivvee--bbeesstt - These flags are redundant in versions 0.9.5 and - above. They provided some coarse control over the - behaviour of the sorting algorithm in earlier ver­ - sions, which was sometimes useful. 0.9.5 and above - have an improved algorithm which renders these - flags irrelevant. - - -MMEEMMOORRYY MMAANNAAGGEEMMEENNTT - _b_z_i_p_2 compresses large files in blocks. The block size - affects both the compression ratio achieved, and the - amount of memory needed for compression and decompression. - The flags -1 through -9 specify the block size to be - 100,000 bytes through 900,000 bytes (the default) respec­ - tively. At decompression time, the block size used for - compression is read from the header of the compressed - file, and _b_u_n_z_i_p_2 then allocates itself just enough memory - to decompress the file. Since block sizes are stored in - compressed files, it follows that the flags -1 to -9 are - irrelevant to and so ignored during decompression. - - Compression and decompression requirements, in bytes, can - be estimated as: - - Compression: 400k + ( 8 x block size ) - - Decompression: 100k + ( 4 x block size ), or - 100k + ( 2.5 x block size ) - - Larger block sizes give rapidly diminishing marginal - returns. Most of the compression comes from the first two - or three hundred k of block size, a fact worth bearing in - mind when using _b_z_i_p_2 on small machines. It is also - important to appreciate that the decompression memory - requirement is set at compression time by the choice of - block size. - - For files compressed with the default 900k block size, - _b_u_n_z_i_p_2 will require about 3700 kbytes to decompress. To - support decompression of any file on a 4 megabyte machine, - _b_u_n_z_i_p_2 has an option to decompress using approximately - half this amount of memory, about 2300 kbytes. Decompres­ - sion speed is also halved, so you should use this option - only where necessary. The relevant flag is -s. - - In general, try and use the largest block size memory con­ - straints allow, since that maximises the compression - achieved. Compression and decompression speed are virtu­ - ally unaffected by block size. - - Another significant point applies to files which fit in a - single block -- that means most files you'd encounter - using a large block size. The amount of real memory - touched is proportional to the size of the file, since the - file is smaller than a block. For example, compressing a - file 20,000 bytes long with the flag -9 will cause the - compressor to allocate around 7600k of memory, but only - touch 400k + 20000 * 8 = 560 kbytes of it. Similarly, the - decompressor will allocate 3700k but only touch 100k + - 20000 * 4 = 180 kbytes. - - Here is a table which summarises the maximum memory usage - for different block sizes. Also recorded is the total - compressed size for 14 files of the Calgary Text Compres­ - sion Corpus totalling 3,141,622 bytes. This column gives - some feel for how compression varies with block size. - These figures tend to understate the advantage of larger - block sizes for larger files, since the Corpus is domi­ - nated by smaller files. - - Compress Decompress Decompress Corpus - Flag usage usage -s usage Size - - -1 1200k 500k 350k 914704 - -2 2000k 900k 600k 877703 - -3 2800k 1300k 850k 860338 - -4 3600k 1700k 1100k 846899 - -5 4400k 2100k 1350k 845160 - -6 5200k 2500k 1600k 838626 - -7 6100k 2900k 1850k 834096 - -8 6800k 3300k 2100k 828642 - -9 7600k 3700k 2350k 828642 - - -RREECCOOVVEERRIINNGG DDAATTAA FFRROOMM DDAAMMAAGGEEDD FFIILLEESS - _b_z_i_p_2 compresses files in blocks, usually 900kbytes long. - Each block is handled independently. If a media or trans­ - mission error causes a multi-block .bz2 file to become - damaged, it may be possible to recover data from the - undamaged blocks in the file. - - The compressed representation of each block is delimited - by a 48-bit pattern, which makes it possible to find the - block boundaries with reasonable certainty. Each block - also carries its own 32-bit CRC, so damaged blocks can be - distinguished from undamaged ones. - - _b_z_i_p_2_r_e_c_o_v_e_r is a simple program whose purpose is to - search for blocks in .bz2 files, and write each block out - into its own .bz2 file. You can then use _b_z_i_p_2 -t to test - the integrity of the resulting files, and decompress those - which are undamaged. - - _b_z_i_p_2_r_e_c_o_v_e_r takes a single argument, the name of the dam­ - aged file, and writes a number of files - "rec00001file.bz2", "rec00002file.bz2", etc, containing - the extracted blocks. The output filenames are - designed so that the use of wildcards in subsequent pro­ - cessing -- for example, "bzip2 -dc rec*file.bz2 > recov­ - ered_data" -- processes the files in the correct order. - - _b_z_i_p_2_r_e_c_o_v_e_r should be of most use dealing with large .bz2 - files, as these will contain many blocks. It is clearly - futile to use it on damaged single-block files, since a - damaged block cannot be recovered. If you wish to min­ - imise any potential data loss through media or transmis­ - sion errors, you might consider compressing with a smaller - block size. - - -PPEERRFFOORRMMAANNCCEE NNOOTTEESS - The sorting phase of compression gathers together similar - strings in the file. Because of this, files containing - very long runs of repeated symbols, like "aabaabaabaab - ..." (repeated several hundred times) may compress more - slowly than normal. Versions 0.9.5 and above fare much - better than previous versions in this respect. The ratio - between worst-case and average-case compression time is in - the region of 10:1. For previous versions, this figure - was more like 100:1. You can use the -vvvv option to mon­ - itor progress in great detail, if you want. - - Decompression speed is unaffected by these phenomena. - - _b_z_i_p_2 usually allocates several megabytes of memory to - operate in, and then charges all over it in a fairly ran­ - dom fashion. This means that performance, both for com­ - pressing and decompressing, is largely determined by the - speed at which your machine can service cache misses. - Because of this, small changes to the code to reduce the - miss rate have been observed to give disproportionately - large performance improvements. I imagine _b_z_i_p_2 will per­ - form best on machines with very large caches. - - -CCAAVVEEAATTSS - I/O error messages are not as helpful as they could be. - _b_z_i_p_2 tries hard to detect I/O errors and exit cleanly, - but the details of what the problem is sometimes seem - rather misleading. - - This manual page pertains to version 1.0.2 of _b_z_i_p_2_. Com­ - pressed data created by this version is entirely forwards - and backwards compatible with the previous public - releases, versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1, - but with the following exception: 0.9.0 and above can cor­ - rectly decompress multiple concatenated compressed files. - 0.1pl2 cannot do this; it will stop after decompressing - just the first file in the stream. - - _b_z_i_p_2_r_e_c_o_v_e_r versions prior to this one, 1.0.2, used - 32-bit integers to represent bit positions in compressed - files, so it could not handle compressed files more than - 512 megabytes long. Version 1.0.2 and above uses 64-bit - ints on some platforms which support them (GNU supported - targets, and Windows). To establish whether or not - bzip2recover was built with such a limitation, run it - without arguments. In any event you can build yourself an - unlimited version if you can recompile it with MaybeUInt64 - set to be an unsigned 64-bit integer. - - - - -AAUUTTHHOORR - Julian Seward, jseward@acm.org. - - http://sources.redhat.com/bzip2 - - The ideas embodied in _b_z_i_p_2 are due to (at least) the fol­ - lowing people: Michael Burrows and David Wheeler (for the - block sorting transformation), David Wheeler (again, for - the Huffman coder), Peter Fenwick (for the structured cod­ - ing model in the original _b_z_i_p_, and many refinements), and - Alistair Moffat, Radford Neal and Ian Witten (for the - arithmetic coder in the original _b_z_i_p_)_. I am much - indebted for their help, support and advice. See the man­ - ual in the source distribution for pointers to sources of - documentation. Christian von Roques encouraged me to look - for faster sorting algorithms, so as to speed up compres­ - sion. Bela Lubkin encouraged me to improve the worst-case - compression performance. The bz* scripts are derived from - those of GNU gzip. Many people sent patches, helped with - portability problems, lent machines, gave advice and were - generally helpful. - - - - bzip2(1) diff --git a/bzip2.c b/bzip2.c index 807f420..5c1c609 100644 --- a/bzip2.c +++ b/bzip2.c @@ -3,6 +3,11 @@ /*--- A block-sorting, lossless compressor bzip2.c ---*/ /*-----------------------------------------------------------*/ +/*-- Modified for use under GNO by Stephen Heumann --*/ +#ifdef __ORCAC__ +segment "bzip2"; +#endif + /*-- This file is a part of bzip2 and/or libbzip2, a program and library for lossless, block-sorting data compression. @@ -148,6 +153,14 @@ #include #include "bzlib.h" +#ifdef __appleiigs__ +#include +char *stristr(const char *, const char *); +#if defined(__GNO__) && defined(__STACK_CHECK__) +#include +#endif +#endif + #define ERROR_IF_EOF(i) { if ((i) == EOF) ioError(); } #define ERROR_IF_NOT_ZERO(i) { if ((i) != 0) ioError(); } #define ERROR_IF_MINUS_ONE(i) { if ((i) == (-1)) ioError(); } @@ -213,6 +226,11 @@ ERROR_IF_MINUS_ONE ( retVal ); \ } while ( 0 ) # endif + +# ifdef __GNO__ +# undef SET_BINARY_MODE +# define SET_BINARY_MODE(fd) fsetbinary(fd); +# endif #endif /* BZ_UNIX */ @@ -220,7 +238,10 @@ #if BZ_LCCWIN32 # include # include -# include +/* This was "#include ", but ORCA/C complains + about an invalid character, so I changed it. This + might possibly break compilation on Win 32 systems. */ +# include # define NORETURN /**/ # define PATH_SEP '\\' @@ -253,8 +274,15 @@ typedef char Char; typedef unsigned char Bool; typedef unsigned char UChar; +#ifdef __ORCAC__ +typedef long Int32; +typedef unsigned long UInt32; +# define Int32_FMT "%ld" +#else typedef int Int32; typedef unsigned int UInt32; +# define Int32_FMT "%d" +#endif /* defined __ORCAC__ */ typedef short Int16; typedef unsigned short UInt16; @@ -386,7 +414,11 @@ static void uInt64_toAscii ( char* outbuf, UInt64* n ) { Int32 i, q; +#ifdef __ORCAC__ + static UChar buf[32]; +#else UChar buf[32]; +#endif Int32 nBuf = 0; UInt64 n_copy = *n; do { @@ -416,15 +448,24 @@ Bool myfeof ( FILE* f ) /*---------------------------------------------*/ +#ifndef __ORCAC__ static void compressStream ( FILE *stream, FILE *zStream ) { BZFILE* bzf = NULL; +#ifdef __ORCAC__ + static UChar ibuf[5000]; +#else UChar ibuf[5000]; +#endif Int32 nIbuf; UInt32 nbytes_in_lo32, nbytes_in_hi32; UInt32 nbytes_out_lo32, nbytes_out_hi32; +#ifdef __ORCAC__ + Int16 bzerr, bzerr_dummy, ret; +#else Int32 bzerr, bzerr_dummy, ret; +#endif SET_BINARY_MODE(stream); SET_BINARY_MODE(zStream); @@ -513,6 +554,7 @@ void compressStream ( FILE *stream, FILE *zStream ) panic ( "compress:end" ); /*notreached*/ } +#endif @@ -521,10 +563,18 @@ static Bool uncompressStream ( FILE *zStream, FILE *stream ) { BZFILE* bzf = NULL; +#ifdef __ORCAC__ + Int16 bzerr, bzerr_dummy; + Int32 ret, nread, streamNo, i; + static UChar obuf[5000]; + static UChar unused[BZ_MAX_UNUSED]; + Int16 nUnused; +#else Int32 bzerr, bzerr_dummy, ret, nread, streamNo, i; UChar obuf[5000]; UChar unused[BZ_MAX_UNUSED]; Int32 nUnused; +#endif UChar* unusedTmp; nUnused = 0; @@ -635,10 +685,18 @@ static Bool testStream ( FILE *zStream ) { BZFILE* bzf = NULL; +#ifdef __ORCAC__ + Int16 bzerr, bzerr_dummy, ret; + Int32 nread, streamNo, i; + static UChar obuf[5000]; + static UChar unused[BZ_MAX_UNUSED]; + Int16 nUnused; +#else Int32 bzerr, bzerr_dummy, ret, nread, streamNo, i; UChar obuf[5000]; UChar unused[BZ_MAX_UNUSED]; Int32 nUnused; +#endif UChar* unusedTmp; nUnused = 0; @@ -802,7 +860,11 @@ void cleanUpAndFail ( Int32 ec ) "%s: `%s' may be incomplete.\n", progName, outName ); fprintf ( stderr, +#ifndef __GNO__ "%s: I suggest doing an integrity test (bzip2 -tv)" +#else + "%s: I suggest doing an integrity test (bunzip2 -tv)" +#endif " of it.\n", progName ); } @@ -811,7 +873,7 @@ void cleanUpAndFail ( Int32 ec ) if (noisy && numFileNames > 0 && numFilesProcessed < numFileNames) { fprintf ( stderr, "%s: WARNING: some files have not been processed:\n" - "%s: %d specified on command line, %d not processed yet.\n\n", + "%s: " Int32_FMT " specified on command line, " Int32_FMT " not processed yet.\n\n", progName, progName, numFileNames, numFileNames - numFilesProcessed ); } @@ -827,8 +889,16 @@ void panic ( Char* s ) fprintf ( stderr, "\n%s: PANIC -- internal consistency error:\n" "\t%s\n" +#ifndef __GNO__ "\tThis is a BUG. Please report it to me at:\n" "\tjseward@acm.org\n", +#else + "\tThis is a BUG. If you are experiencing it only in\n" + "the GNO version of bunzip2, please report it to me at\n" + "sheumann@myrealbox.com . If you can duplicate it in\n" + "other versions of bzip2 as well, please report it to\n" + "the original author Julian Seward at tjseward@acm.org\n", +#endif progName, s ); showFileNames(); cleanUpAndFail( 3 ); @@ -880,6 +950,10 @@ void ioError ( void ) /*---------------------------------------------*/ +#ifdef __ORCAC__ +#pragma databank 1 +#endif + static void mySignalCatcher ( IntNative n ) { @@ -889,11 +963,14 @@ void mySignalCatcher ( IntNative n ) cleanUpAndFail(1); } - +/* This function should never be called on a normal GNO system, + but it doesn't hurt to leave it in. */ /*---------------------------------------------*/ static void mySIGSEGVorSIGBUScatcher ( IntNative n ) { +#ifndef __ORCAC__ +/* Not needed for decompression */ if (opMode == OM_Z) fprintf ( stderr, @@ -915,7 +992,10 @@ void mySIGSEGVorSIGBUScatcher ( IntNative n ) " have the manual or can't be bothered to read it, mail me anyway.\n" "\n", progName ); - else + else +#else + if (opMode != OM_Z) +#endif fprintf ( stderr, "\n%s: Caught a SIGSEGV or SIGBUS whilst decompressing.\n" @@ -945,6 +1025,10 @@ void mySIGSEGVorSIGBUScatcher ( IntNative n ) { cadvise(); cleanUpAndFail( 2 ); } } +#ifdef __ORCAC__ +#pragma databank 0 +#endif + /*---------------------------------------------*/ static @@ -1000,10 +1084,10 @@ void copyFileName ( Char* to, Char* from ) if ( strlen(from) > FILE_NAME_LEN-10 ) { fprintf ( stderr, - "bzip2: file name\n`%s'\n" + "%s: file name\n`%s'\n" "is suspiciously (more than %d chars) long.\n" "Try using a reasonable file name instead. Sorry! :-)\n", - from, FILE_NAME_LEN-10 + progName, from, FILE_NAME_LEN-10 ); setExit(1); exit(exitValue); @@ -1137,13 +1221,43 @@ void applySavedMetaInfoToOutputFile ( Char *dstName ) retVal = chmod ( dstName, fileMetaInfo.st_mode ); ERROR_IF_NOT_ZERO ( retVal ); +#ifndef __ORCAC__ + /* ORCA/C's localtime(), which is called by utime(), is broken. + * We fix this by simply disabling time setting, as bzip2 does + * on non-Unix platforms anyway. A better solution would be + * to fix or replace utime() and/or localtime(). + */ retVal = utime ( dstName, &uTimBuf ); ERROR_IF_NOT_ZERO ( retVal ); +#endif +#ifdef __appleiigs__ + /* Set filetype to BIN if running on the GS */ + { + static GSString255 fileNameStringGS; + static FileInfoRecGS infoRec = { 4, /* pCount */ + &fileNameStringGS, /* Ptr to file name */ + 0x00C3, /* access restrictions (none) */ + 0x06, /* filetype (BIN) */ + 0x0000 /* auxtype ($0000) */ + }; + + if (strlen( dstName ) <= 255) { + strncpy( fileNameStringGS.text, dstName, 255 ); + fileNameStringGS.length = strlen( dstName ); + SetFileInfo( &infoRec ); + /* Ignore any errors produced by this call, leaving the file's + existing filetype intact. This parallels the approach taken + when setting file attributes on Unix. + */ + } + } +#else retVal = chown ( dstName, fileMetaInfo.st_uid, fileMetaInfo.st_gid ); /* chown() will in many cases return with EPERM, which can be safely ignored. */ +#endif /* defined __GNO__ */ # endif } @@ -1181,8 +1295,14 @@ Bool hasSuffix ( Char* s, Char* suffix ) { Int32 ns = strlen(s); Int32 nx = strlen(suffix); - if (ns < nx) return False; + if (ns < nx) return False; +#ifndef __appleiigs__ if (strcmp(s + ns - nx, suffix) == 0) return True; +#else + /* Filenames are case-insensitive on the GS, + so use a case-insensitive compare for them */ + if (strcasecmp(s + ns - nx, suffix) == 0) return True; +#endif return False; } @@ -1198,6 +1318,7 @@ Bool mapSuffix ( Char* name, /*---------------------------------------------*/ +#ifndef __ORCAC__ static void compress ( Char *name ) { @@ -1279,7 +1400,7 @@ void compress ( Char *name ) } if ( srcMode == SM_F2F && !forceOverwrite && (n=countHardLinks ( inName )) > 0) { - fprintf ( stderr, "%s: Input file %s has %d other link%s.\n", + fprintf ( stderr, "%s: Input file %s has " Int32_FMT " other link%s.\n", progName, inName, n, n > 1 ? "s" : "" ); setExit(1); return; @@ -1376,6 +1497,7 @@ void compress ( Char *name ) deleteOutputOnInterrupt = False; } +#endif /*---------------------------------------------*/ @@ -1465,7 +1587,7 @@ void uncompress ( Char *name ) } if ( srcMode == SM_F2F && !forceOverwrite && (n=countHardLinks ( inName ) ) > 0) { - fprintf ( stderr, "%s: Input file %s has %d other link%s.\n", + fprintf ( stderr, "%s: Input file %s has " Int32_FMT " other link%s.\n", progName, inName, n, n > 1 ? "s" : "" ); setExit(1); return; @@ -1671,7 +1793,11 @@ void license ( void ) { fprintf ( stderr, +#ifndef __ORCAC__ "bzip2, a block-sorting file compressor. " +#else + "bunzip2, a block-sorting file decompressor. " +#endif "Version %s.\n" " \n" " Copyright (C) 1996-2002 by Julian Seward.\n" @@ -1684,6 +1810,14 @@ void license ( void ) " but WITHOUT ANY WARRANTY; without even the implied warranty of\n" " MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" " LICENSE file for more details.\n" +#ifdef __ORCAC__ + " \n" + " This version of bunzip2 for GNO is based on Julian Seward's bzip2\n" + " program for other platforms, with modifications by Stephen Heumann.\n" + " \n" + " This program contains material from the ORCA/C Run-Time Libraries,\n" + " copyright 1987-1996 by Byte Works, Inc. Used with permission.\n" +#endif " \n", BZ2_bzlibVersion() ); @@ -1696,13 +1830,19 @@ void usage ( Char *fullProgName ) { fprintf ( stderr, +#ifndef __ORCAC__ "bzip2, a block-sorting file compressor. " +#else + "bunzip2, a block-sorting file decompressor. " +#endif "Version %s.\n" "\n usage: %s [flags and input files in any order]\n" "\n" " -h --help print this message\n" " -d --decompress force decompression\n" +#ifndef __ORCAC__ " -z --compress force compression\n" +#endif " -k --keep keep (don't delete) input files\n" " -f --force overwrite existing output files\n" " -t --test test compressed file integrity\n" @@ -1712,17 +1852,33 @@ void usage ( Char *fullProgName ) " -L --license display software version & license\n" " -V --version display software version & license\n" " -s --small use less memory (at most 2500k)\n" +#ifndef __ORCAC__ " -1 .. -9 set block size to 100k .. 900k\n" " --fast alias for -1\n" " --best alias for -9\n" +#endif "\n" +#ifndef __ORCAC__ " If invoked as `bzip2', default action is to compress.\n" " as `bunzip2', default action is to decompress.\n" +#else + " If invoked as 'bunzip2', default action is to decompress.\n" +#endif " as `bzcat', default action is to decompress to stdout.\n" "\n" +#ifndef __ORCAC__ " If no file names are given, bzip2 compresses or decompresses\n" " from standard input to standard output. You can combine\n" " short flags, so `-v -4' means the same as -v4 or -4v, &c.\n" +#else + " If no file names are given, bunzip2 decompresses from standard\n" + " input to standard output. You can combine short flags, so\n" + " `-v -4' means the same as -v4 or -4v, &c.\n" + "\n" + " This version of bunzip2 for GNO is based on the bzip2 program for\n" + " other platforms; however, it has all compression functionality\n" + " disabled and will only decompress or test compressed files.\n" +#endif # if BZ_UNIX "\n" # endif @@ -1794,6 +1950,10 @@ Cell *mkCell ( void ) /*---------------------------------------------*/ +#ifdef __ORCAC__ +#pragma optimize 119 +#endif + static Cell *snocString ( Cell *root, Char *name ) { @@ -1810,6 +1970,10 @@ Cell *snocString ( Cell *root, Char *name ) } } +#ifdef __ORCAC__ +#pragma optimize -1 +#endif + /*---------------------------------------------*/ static @@ -1850,6 +2014,11 @@ IntNative main ( IntNative argc, Char *argv[] ) Cell *aa; Bool decode; +#if defined(__GNO__) && defined(__STACK_CHECK__) + __REPORT_STACK(); + fprintf(stderr, "Stack checking on\n"); +#endif + /*-- Be really really really paranoid :-) --*/ if (sizeof(Int32) != 4 || sizeof(UInt32) != 4 || sizeof(Int16) != 2 || sizeof(UInt16) != 2 || @@ -1920,6 +2089,7 @@ IntNative main ( IntNative argc, Char *argv[] ) /*-- Determine what to do (compress/uncompress/test/cat). --*/ /*-- Note that subsequent flag handling may change this. --*/ +#ifndef __ORCAC__ opMode = OM_Z; if ( (strstr ( progName, "unzip" ) != 0) || @@ -1934,6 +2104,23 @@ IntNative main ( IntNative argc, Char *argv[] ) srcMode = (numFileNames == 0) ? SM_I2O : SM_F2O; } +#else + /* GNO modifications: Decompress by default, and use case-insensitive + compares for filenames, in keeping with the normal practice on the GS */ + opMode = OM_UNZ; + + if (stristr ( progName, "bzip" ) != 0) + opMode = OM_Z; + + if (stristr ( progName, "unzip" ) != 0) + opMode = OM_UNZ; + + if ( (stristr ( progName, "z2cat" ) != 0) || + (stristr ( progName, "zcat" ) != 0) ) { + opMode = OM_UNZ; + srcMode = (numFileNames == 0) ? SM_I2O : SM_F2O; + } +#endif /*-- Look at the flags. --*/ for (aa = argList; aa != NULL; aa = aa->link) { @@ -2026,6 +2213,7 @@ IntNative main ( IntNative argc, Char *argv[] ) } if (opMode == OM_Z) { +#ifndef __ORCAC__ if (srcMode == SM_I2O) { compress ( NULL ); } else { @@ -2037,6 +2225,13 @@ IntNative main ( IntNative argc, Char *argv[] ) compress ( aa->name ); } } +#else + fprintf ( stderr, + "%s: Cannot compress data. The GNO version of bunzip2 does\n" + "%s: not support compression, only decompression and testing.\n", + progName, progName ); + cleanUpAndFail( 4 ); +#endif } else diff --git a/bzip2.txt b/bzip2.txt deleted file mode 100644 index 6afe358..0000000 --- a/bzip2.txt +++ /dev/null @@ -1,390 +0,0 @@ - -NAME - bzip2, bunzip2 - a block-sorting file compressor, v1.0.2 - bzcat - decompresses files to stdout - bzip2recover - recovers data from damaged bzip2 files - - -SYNOPSIS - bzip2 [ -cdfkqstvzVL123456789 ] [ filenames ... ] - bunzip2 [ -fkvsVL ] [ filenames ... ] - bzcat [ -s ] [ filenames ... ] - bzip2recover filename - - -DESCRIPTION - bzip2 compresses files using the Burrows-Wheeler block - sorting text compression algorithm, and Huffman coding. - Compression is generally considerably better than that - achieved by more conventional LZ77/LZ78-based compressors, - and approaches the performance of the PPM family of sta­ - tistical compressors. - - The command-line options are deliberately very similar to - those of GNU gzip, but they are not identical. - - bzip2 expects a list of file names to accompany the com­ - mand-line flags. Each file is replaced by a compressed - version of itself, with the name "original_name.bz2". - Each compressed file has the same modification date, per­ - missions, and, when possible, ownership as the correspond­ - ing original, so that these properties can be correctly - restored at decompression time. File name handling is - naive in the sense that there is no mechanism for preserv­ - ing original file names, permissions, ownerships or dates - in filesystems which lack these concepts, or have serious - file name length restrictions, such as MS-DOS. - - bzip2 and bunzip2 will by default not overwrite existing - files. If you want this to happen, specify the -f flag. - - If no file names are specified, bzip2 compresses from - standard input to standard output. In this case, bzip2 - will decline to write compressed output to a terminal, as - this would be entirely incomprehensible and therefore - pointless. - - bunzip2 (or bzip2 -d) decompresses all specified files. - Files which were not created by bzip2 will be detected and - ignored, and a warning issued. bzip2 attempts to guess - the filename for the decompressed file from that of the - compressed file as follows: - - filename.bz2 becomes filename - filename.bz becomes filename - filename.tbz2 becomes filename.tar - filename.tbz becomes filename.tar - anyothername becomes anyothername.out - - If the file does not end in one of the recognised endings, - .bz2, .bz, .tbz2 or .tbz, bzip2 complains that it cannot - guess the name of the original file, and uses the original - name with .out appended. - - As with compression, supplying no filenames causes decom­ - pression from standard input to standard output. - - bunzip2 will correctly decompress a file which is the con­ - catenation of two or more compressed files. The result is - the concatenation of the corresponding uncompressed files. - Integrity testing (-t) of concatenated compressed files is - also supported. - - You can also compress or decompress files to the standard - output by giving the -c flag. Multiple files may be com­ - pressed and decompressed like this. The resulting outputs - are fed sequentially to stdout. Compression of multiple - files in this manner generates a stream containing multi­ - ple compressed file representations. Such a stream can be - decompressed correctly only by bzip2 version 0.9.0 or - later. Earlier versions of bzip2 will stop after decom­ - pressing the first file in the stream. - - bzcat (or bzip2 -dc) decompresses all specified files to - the standard output. - - bzip2 will read arguments from the environment variables - BZIP2 and BZIP, in that order, and will process them - before any arguments read from the command line. This - gives a convenient way to supply default arguments. - - Compression is always performed, even if the compressed - file is slightly larger than the original. Files of less - than about one hundred bytes tend to get larger, since the - compression mechanism has a constant overhead in the - region of 50 bytes. Random data (including the output of - most file compressors) is coded at about 8.05 bits per - byte, giving an expansion of around 0.5%. - - As a self-check for your protection, bzip2 uses 32-bit - CRCs to make sure that the decompressed version of a file - is identical to the original. This guards against corrup­ - tion of the compressed data, and against undetected bugs - in bzip2 (hopefully very unlikely). The chances of data - corruption going undetected is microscopic, about one - chance in four billion for each file processed. Be aware, - though, that the check occurs upon decompression, so it - can only tell you that something is wrong. It can't help - you recover the original uncompressed data. You can use - bzip2recover to try to recover data from damaged files. - - Return values: 0 for a normal exit, 1 for environmental - problems (file not found, invalid flags, I/O errors, &c), - 2 to indicate a corrupt compressed file, 3 for an internal - consistency error (eg, bug) which caused bzip2 to panic. - - -OPTIONS - -c --stdout - Compress or decompress to standard output. - - -d --decompress - Force decompression. bzip2, bunzip2 and bzcat are - really the same program, and the decision about - what actions to take is done on the basis of which - name is used. This flag overrides that mechanism, - and forces bzip2 to decompress. - - -z --compress - The complement to -d: forces compression, - regardless of the invocation name. - - -t --test - Check integrity of the specified file(s), but don't - decompress them. This really performs a trial - decompression and throws away the result. - - -f --force - Force overwrite of output files. Normally, bzip2 - will not overwrite existing output files. Also - forces bzip2 to break hard links to files, which it - otherwise wouldn't do. - - bzip2 normally declines to decompress files which - don't have the correct magic header bytes. If - forced (-f), however, it will pass such files - through unmodified. This is how GNU gzip behaves. - - -k --keep - Keep (don't delete) input files during compression - or decompression. - - -s --small - Reduce memory usage, for compression, decompression - and testing. Files are decompressed and tested - using a modified algorithm which only requires 2.5 - bytes per block byte. This means any file can be - decompressed in 2300k of memory, albeit at about - half the normal speed. - - During compression, -s selects a block size of - 200k, which limits memory use to around the same - figure, at the expense of your compression ratio. - In short, if your machine is low on memory (8 - megabytes or less), use -s for everything. See - MEMORY MANAGEMENT below. - - -q --quiet - Suppress non-essential warning messages. Messages - pertaining to I/O errors and other critical events - will not be suppressed. - - -v --verbose - Verbose mode -- show the compression ratio for each - file processed. Further -v's increase the ver­ - bosity level, spewing out lots of information which - is primarily of interest for diagnostic purposes. - - -L --license -V --version - Display the software version, license terms and - conditions. - - -1 (or --fast) to -9 (or --best) - Set the block size to 100 k, 200 k .. 900 k when - compressing. Has no effect when decompressing. - See MEMORY MANAGEMENT below. The --fast and --best - aliases are primarily for GNU gzip compatibility. - In particular, --fast doesn't make things signifi­ - cantly faster. And --best merely selects the - default behaviour. - - -- Treats all subsequent arguments as file names, even - if they start with a dash. This is so you can han­ - dle files with names beginning with a dash, for - example: bzip2 -- -myfilename. - - --repetitive-fast --repetitive-best - These flags are redundant in versions 0.9.5 and - above. They provided some coarse control over the - behaviour of the sorting algorithm in earlier ver­ - sions, which was sometimes useful. 0.9.5 and above - have an improved algorithm which renders these - flags irrelevant. - - -MEMORY MANAGEMENT - bzip2 compresses large files in blocks. The block size - affects both the compression ratio achieved, and the - amount of memory needed for compression and decompression. - The flags -1 through -9 specify the block size to be - 100,000 bytes through 900,000 bytes (the default) respec­ - tively. At decompression time, the block size used for - compression is read from the header of the compressed - file, and bunzip2 then allocates itself just enough memory - to decompress the file. Since block sizes are stored in - compressed files, it follows that the flags -1 to -9 are - irrelevant to and so ignored during decompression. - - Compression and decompression requirements, in bytes, can - be estimated as: - - Compression: 400k + ( 8 x block size ) - - Decompression: 100k + ( 4 x block size ), or - 100k + ( 2.5 x block size ) - - Larger block sizes give rapidly diminishing marginal - returns. Most of the compression comes from the first two - or three hundred k of block size, a fact worth bearing in - mind when using bzip2 on small machines. It is also - important to appreciate that the decompression memory - requirement is set at compression time by the choice of - block size. - - For files compressed with the default 900k block size, - bunzip2 will require about 3700 kbytes to decompress. To - support decompression of any file on a 4 megabyte machine, - bunzip2 has an option to decompress using approximately - half this amount of memory, about 2300 kbytes. Decompres­ - sion speed is also halved, so you should use this option - only where necessary. The relevant flag is -s. - - In general, try and use the largest block size memory con­ - straints allow, since that maximises the compression - achieved. Compression and decompression speed are virtu­ - ally unaffected by block size. - - Another significant point applies to files which fit in a - single block -- that means most files you'd encounter - using a large block size. The amount of real memory - touched is proportional to the size of the file, since the - file is smaller than a block. For example, compressing a - file 20,000 bytes long with the flag -9 will cause the - compressor to allocate around 7600k of memory, but only - touch 400k + 20000 * 8 = 560 kbytes of it. Similarly, the - decompressor will allocate 3700k but only touch 100k + - 20000 * 4 = 180 kbytes. - - Here is a table which summarises the maximum memory usage - for different block sizes. Also recorded is the total - compressed size for 14 files of the Calgary Text Compres­ - sion Corpus totalling 3,141,622 bytes. This column gives - some feel for how compression varies with block size. - These figures tend to understate the advantage of larger - block sizes for larger files, since the Corpus is domi­ - nated by smaller files. - - Compress Decompress Decompress Corpus - Flag usage usage -s usage Size - - -1 1200k 500k 350k 914704 - -2 2000k 900k 600k 877703 - -3 2800k 1300k 850k 860338 - -4 3600k 1700k 1100k 846899 - -5 4400k 2100k 1350k 845160 - -6 5200k 2500k 1600k 838626 - -7 6100k 2900k 1850k 834096 - -8 6800k 3300k 2100k 828642 - -9 7600k 3700k 2350k 828642 - - -RECOVERING DATA FROM DAMAGED FILES - bzip2 compresses files in blocks, usually 900kbytes long. - Each block is handled independently. If a media or trans­ - mission error causes a multi-block .bz2 file to become - damaged, it may be possible to recover data from the - undamaged blocks in the file. - - The compressed representation of each block is delimited - by a 48-bit pattern, which makes it possible to find the - block boundaries with reasonable certainty. Each block - also carries its own 32-bit CRC, so damaged blocks can be - distinguished from undamaged ones. - - bzip2recover is a simple program whose purpose is to - search for blocks in .bz2 files, and write each block out - into its own .bz2 file. You can then use bzip2 -t to test - the integrity of the resulting files, and decompress those - which are undamaged. - - bzip2recover takes a single argument, the name of the dam­ - aged file, and writes a number of files - "rec00001file.bz2", "rec00002file.bz2", etc, containing - the extracted blocks. The output filenames are - designed so that the use of wildcards in subsequent pro­ - cessing -- for example, "bzip2 -dc rec*file.bz2 > recov­ - ered_data" -- processes the files in the correct order. - - bzip2recover should be of most use dealing with large .bz2 - files, as these will contain many blocks. It is clearly - futile to use it on damaged single-block files, since a - damaged block cannot be recovered. If you wish to min­ - imise any potential data loss through media or transmis­ - sion errors, you might consider compressing with a smaller - block size. - - -PERFORMANCE NOTES - The sorting phase of compression gathers together similar - strings in the file. Because of this, files containing - very long runs of repeated symbols, like "aabaabaabaab - ..." (repeated several hundred times) may compress more - slowly than normal. Versions 0.9.5 and above fare much - better than previous versions in this respect. The ratio - between worst-case and average-case compression time is in - the region of 10:1. For previous versions, this figure - was more like 100:1. You can use the -vvvv option to mon­ - itor progress in great detail, if you want. - - Decompression speed is unaffected by these phenomena. - - bzip2 usually allocates several megabytes of memory to - operate in, and then charges all over it in a fairly ran­ - dom fashion. This means that performance, both for com­ - pressing and decompressing, is largely determined by the - speed at which your machine can service cache misses. - Because of this, small changes to the code to reduce the - miss rate have been observed to give disproportionately - large performance improvements. I imagine bzip2 will per­ - form best on machines with very large caches. - - -CAVEATS - I/O error messages are not as helpful as they could be. - bzip2 tries hard to detect I/O errors and exit cleanly, - but the details of what the problem is sometimes seem - rather misleading. - - This manual page pertains to version 1.0.2 of bzip2. Com­ - pressed data created by this version is entirely forwards - and backwards compatible with the previous public - releases, versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1, - but with the following exception: 0.9.0 and above can cor­ - rectly decompress multiple concatenated compressed files. - 0.1pl2 cannot do this; it will stop after decompressing - just the first file in the stream. - - bzip2recover versions prior to this one, 1.0.2, used - 32-bit integers to represent bit positions in compressed - files, so it could not handle compressed files more than - 512 megabytes long. Version 1.0.2 and above uses 64-bit - ints on some platforms which support them (GNU supported - targets, and Windows). To establish whether or not - bzip2recover was built with such a limitation, run it - without arguments. In any event you can build yourself an - unlimited version if you can recompile it with MaybeUInt64 - set to be an unsigned 64-bit integer. - - -AUTHOR - Julian Seward, jseward@acm.org. - - http://sources.redhat.com/bzip2 - - The ideas embodied in bzip2 are due to (at least) the fol­ - lowing people: Michael Burrows and David Wheeler (for the - block sorting transformation), David Wheeler (again, for - the Huffman coder), Peter Fenwick (for the structured cod­ - ing model in the original bzip, and many refinements), and - Alistair Moffat, Radford Neal and Ian Witten (for the - arithmetic coder in the original bzip). I am much - indebted for their help, support and advice. See the man­ - ual in the source distribution for pointers to sources of - documentation. Christian von Roques encouraged me to look - for faster sorting algorithms, so as to speed up compres­ - sion. Bela Lubkin encouraged me to improve the worst-case - compression performance. The bz* scripts are derived from - those of GNU gzip. Many people sent patches, helped with - portability problems, lent machines, gave advice and were - generally helpful. - diff --git a/bzip2recover.1 b/bzip2recover.1 new file mode 100644 index 0000000..9f972a0 --- /dev/null +++ b/bzip2recover.1 @@ -0,0 +1 @@ +.so man1/bunzip2.1 diff --git a/bzip2recover.c b/bzip2recover.c index 286873b..68947d9 100644 --- a/bzip2recover.c +++ b/bzip2recover.c @@ -4,6 +4,8 @@ /*--- bzip2recover.c ---*/ /*-----------------------------------------------------------*/ +/*-- Modified for use under GNO by Stephen Heumann --*/ + /*-- This program is bzip2recover, a program to attempt data salvage from damaged files created by the accompanying @@ -56,7 +58,14 @@ #include #include #include +#include +#ifdef __appleiigs__ +#include +#if defined(__GNO__) && defined(__STACK_CHECK__) +#include +#endif +#endif /* This program records bit locations in the file to be recovered. That means that if 64-bit ints are not supported, we will not @@ -74,14 +83,28 @@ #ifdef _MSC_VER typedef unsigned __int64 MaybeUInt64; # define MaybeUInt64_FMT "%I64u" +#else +#ifdef __ORCAC__ + typedef unsigned long MaybeUInt64; +# define MaybeUInt64_FMT "%lu" #else typedef unsigned int MaybeUInt64; # define MaybeUInt64_FMT "%u" #endif #endif +#endif -typedef unsigned int UInt32; -typedef int Int32; +#ifdef __ORCAC__ + typedef unsigned long UInt32; + typedef long Int32; +# define Int32_FMT "%ld" +# define size_t_FMT "%lu" +#else + typedef unsigned int UInt32; + typedef int Int32; +# define Int32_FMT "%d" +# define size_t_FMT "%d" +#endif /* defined __ORCAC__ */ typedef unsigned char UChar; typedef char Char; typedef unsigned char Bool; @@ -143,7 +166,7 @@ void writeError ( void ) void mallocFail ( Int32 n ) { fprintf ( stderr, - "%s: malloc failed on request for %d bytes.\n", + "%s: malloc failed on request for " Int32_FMT " bytes.\n", progName, n ); fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", progName ); @@ -155,7 +178,7 @@ void mallocFail ( Int32 n ) void tooManyBlocks ( Int32 max_handled_blocks ) { fprintf ( stderr, - "%s: `%s' appears to contain more than %d blocks\n", + "%s: `%s' appears to contain more than " Int32_FMT " blocks\n", progName, inFileName, max_handled_blocks ); fprintf ( stderr, "%s: and cannot be handled. To fix, increase\n", @@ -296,8 +319,13 @@ Bool endsInBz2 ( Char* name ) if (n <= 4) return False; return (name[n-4] == '.' && +#ifdef __GNO__ + (name[n-3] == 'b' || name[n-3] == 'B') && + (name[n-2] == 'z' || name[n-3] == 'Z') && +#else name[n-3] == 'b' && name[n-2] == 'z' && +#endif name[n-1] == '2'); } @@ -313,6 +341,10 @@ Bool endsInBz2 ( Char* name ) # define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */ #endif +#ifdef __appleiigs__ +# define BZ_SPLIT_SYM_GS ':' /* possible path splitter on GS/OS */ +#endif + #define BLOCK_HEADER_HI 0x00003141UL #define BLOCK_HEADER_LO 0x59265359UL @@ -323,14 +355,28 @@ Bool endsInBz2 ( Char* name ) would have an uncompressed size of at least 40GB, so the chances are low you'll need to up this. */ +/* STH - Values larger than 5369 (actually a bit less than that) + are useless when MaybeUInt64 is 32 bits. +*/ +#ifdef __ORCAC__ +#define BZ_MAX_HANDLED_BLOCKS 5369 +#else #define BZ_MAX_HANDLED_BLOCKS 50000 +#endif +#ifndef __ORCAC__ MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS]; MaybeUInt64 bEnd [BZ_MAX_HANDLED_BLOCKS]; MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS]; MaybeUInt64 rbEnd [BZ_MAX_HANDLED_BLOCKS]; +#else /* if defined __GNO__ */ +MaybeUInt64 *bStart; +MaybeUInt64 *bEnd; +MaybeUInt64 *rbStart; +MaybeUInt64 *rbEnd; +#endif -Int32 main ( Int32 argc, Char** argv ) +int main ( int argc, Char** argv ) { FILE* inFile; FILE* outFile; @@ -341,11 +387,19 @@ Int32 main ( Int32 argc, Char** argv ) UInt32 buffHi, buffLo, blockCRC; Char* p; +#if defined(__GNO__) && defined(__STACK_CHECK__) + __REPORT_STACK(); +#endif + strcpy ( progName, argv[0] ); inFileName[0] = outFileName[0] = 0; - fprintf ( stderr, + fprintf ( stderr, +#ifdef __GNO__ + "bzip2recover 1.0.2gs1: extracts blocks from damaged .bz2 files.\n" ); +#else "bzip2recover 1.0.2: extracts blocks from damaged .bz2 files.\n" ); +#endif if (argc != 2) { fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n", @@ -358,9 +412,11 @@ Int32 main ( Int32 argc, Char** argv ) case 4: fprintf(stderr, "\trestrictions on size of recovered file: 512 MB\n"); +#ifndef __ORCAC__ fprintf(stderr, "\tto circumvent, recompile with MaybeUInt64 as an\n" "\tunsigned 64-bit int.\n"); +#endif break; default: fprintf(stderr, @@ -373,7 +429,7 @@ Int32 main ( Int32 argc, Char** argv ) if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) { fprintf ( stderr, - "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n", + "%s: supplied filename is suspiciously (>= " size_t_FMT " chars) long. Bye!\n", progName, strlen(argv[1]) ); exit(1); } @@ -386,6 +442,21 @@ Int32 main ( Int32 argc, Char** argv ) exit(1); } +/* Allocate big arrays dynamically so we can use small memory model. These aren't + explicitly free()'d anywhere, but exist for the duration of the program. */ +#ifdef __ORCAC__ + bStart = malloc(BZ_MAX_HANDLED_BLOCKS * sizeof(MaybeUInt64)); + bEnd = malloc(BZ_MAX_HANDLED_BLOCKS * sizeof(MaybeUInt64)); + rbStart = malloc(BZ_MAX_HANDLED_BLOCKS * sizeof(MaybeUInt64)); + rbEnd = malloc(BZ_MAX_HANDLED_BLOCKS * sizeof(MaybeUInt64)); + + if ((bStart == NULL) || (bEnd == NULL) || + (rbStart == NULL) || (rbEnd == NULL)) { + fprintf ( stderr, "%s: couldn't allocate enough memory\n", progName ); + exit(1); + } +#endif + bsIn = bsOpenReadStream ( inFile ); fprintf ( stderr, "%s: searching for block boundaries ...\n", progName ); @@ -404,7 +475,7 @@ Int32 main ( Int32 argc, Char** argv ) (bitsRead - bStart[currBlock]) >= 40) { bEnd[currBlock] = bitsRead-1; if (currBlock > 0) - fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT + fprintf ( stderr, " block " Int32_FMT " runs from " MaybeUInt64_FMT " to " MaybeUInt64_FMT " (incomplete)\n", currBlock, bStart[currBlock], bEnd[currBlock] ); } else @@ -426,7 +497,7 @@ Int32 main ( Int32 argc, Char** argv ) } if (currBlock > 0 && (bEnd[currBlock] - bStart[currBlock]) >= 130) { - fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT + fprintf ( stderr, " block " Int32_FMT " runs from " MaybeUInt64_FMT " to " MaybeUInt64_FMT "\n", rbCtr+1, bStart[currBlock], bEnd[currBlock] ); rbStart[rbCtr] = bStart[currBlock]; @@ -496,26 +567,41 @@ Int32 main ( Int32 argc, Char** argv ) if (bitsRead == rbStart[wrBlock]) { /* Create the output file name, correctly handling leading paths. (31.10.2001 by Sergey E. Kusikov) */ + /* Modified by STH to make it work better on GNO. It would still + be confused by files with a '/' character in their names. */ Char* split; Int32 ofs, k; for (k = 0; k < BZ_MAX_FILENAME; k++) outFileName[k] = 0; strcpy (outFileName, inFileName); +#ifdef __appleiigs__ + split = ((strrchr (outFileName, BZ_SPLIT_SYM_GS) > + strrchr (outFileName, BZ_SPLIT_SYM)) ? + strrchr (outFileName, BZ_SPLIT_SYM_GS) : + strrchr (outFileName, BZ_SPLIT_SYM)); +#else split = strrchr (outFileName, BZ_SPLIT_SYM); +#endif /* defined __appleiigs__ */ if (split == NULL) { split = outFileName; } else { ++split; } /* Now split points to the start of the basename. */ - ofs = split - outFileName; + ofs = split - outFileName; +/* On GS, max. block number is < 6000, and ProDOS filenames are short, + so use only four digits in output file name */ +#ifdef __ORCAC__ + sprintf (split, "rec%4ld", wrBlock+1); +#else sprintf (split, "rec%5d", wrBlock+1); +#endif for (p = split; *p != 0; p++) if (*p == ' ') *p = '0'; strcat (outFileName, inFileName + ofs); if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" ); - fprintf ( stderr, " writing block %d to `%s' ...\n", + fprintf ( stderr, " writing block " Int32_FMT " to `%s' ...\n", wrBlock+1, outFileName ); outFile = fopen ( outFileName, "wb" ); @@ -524,6 +610,27 @@ Int32 main ( Int32 argc, Char** argv ) progName, outFileName ); exit(1); } + +#ifdef __appleiigs__ + /* Set filetype to BIN if running on the GS */ + { + static GSString255 fileNameStringGS; + static FileInfoRecGS infoRec = { 4, /* pCount */ + &fileNameStringGS, /* Ptr to file name */ + 0x00C3, /* access restrictions (none) */ + 0x06, /* filetype (BIN) */ + 0x0000 /* auxtype ($0000) */ + }; + + if (strlen( outFileName ) <= 255) { + strncpy( fileNameStringGS.text, outFileName, 255 ); + fileNameStringGS.length = strlen( outFileName ); + SetFileInfo( &infoRec ); + /* Ignoring any errors produced by this call */ + } + } +#endif + bsWr = bsOpenWriteStream ( outFile ); bsPutUChar ( bsWr, BZ_HDR_B ); bsPutUChar ( bsWr, BZ_HDR_Z ); @@ -535,7 +642,7 @@ Int32 main ( Int32 argc, Char** argv ) } } - fprintf ( stderr, "%s: finished\n", progName ); + fprintf ( stderr, "%s: finished\n", progName ); return 0; } diff --git a/bzip2recover.rez b/bzip2recover.rez new file mode 100644 index 0000000..37fc936 --- /dev/null +++ b/bzip2recover.rez @@ -0,0 +1,15 @@ +#include "/lang/orca/libraries/rinclude/Types.Rez" + +resource rVersion (0x1, purgeable3, nocrossbank) { + + { 1, 0, 2, /* version 1.0.2 */ + release, /* development|alpha|beta|final|release */ + 0 /* non-final release number */ + }, + verUS, /* country code -- only some are avail */ + "bzip2recover", /* name */ + /* _Very_ brief descrition. Check "file info" */ + /* shown in the Finder to see if it's too long */ + /* Note that \n is used to separate lines here. */ + "Bzip2 archive recovery program" +}; diff --git a/bzlib.c b/bzlib.c index 7d1cb27..43f552d 100644 --- a/bzlib.c +++ b/bzlib.c @@ -4,6 +4,11 @@ /*--- bzlib.c ---*/ /*-------------------------------------------------------------*/ +/*-- Modified for use under GNO by Stephen Heumann --*/ +#ifdef __ORCAC__ +segment "bzip2"; +#endif + /*-- This file is a part of bzip2 and/or libbzip2, a program and library for lossless, block-sorting data compression. @@ -85,7 +90,18 @@ #ifndef BZ_NO_STDIO void BZ2_bz__AssertH__fail ( int errcode ) { - fprintf(stderr, + fprintf(stderr, +#ifdef __GNO__ + "\n\nbunzip2/libbzip2: internal error number %d.\n" + "This is a bug in bunzip2/libbzip2, %s.\n" + "If you are experiencing it only in the GNO version of bunzip2,\n" + "please report it to me at sheumann@myrealbox.com . If you can\n" + "duplicate it in other versions of bzip2 as well, please report\n" + "it to the original author Julian Seward at tjseward@acm.org .\n", + "Please make an effort to report this bug; timely and accurate\n" + "bug reports eventually lead to higher quality software. Thanks.\n" + "Stephen Heumann and Julian Seward.\n\n", +#else "\n\nbzip2/libbzip2: internal error number %d.\n" "This is a bug in bzip2/libbzip2, %s.\n" "Please report it to me at: jseward@acm.org. If this happened\n" @@ -94,10 +110,15 @@ void BZ2_bz__AssertH__fail ( int errcode ) "of that program. Please make an effort to report this bug;\n" "timely and accurate bug reports eventually lead to higher\n" "quality software. Thanks. Julian Seward, 30 December 2001.\n\n", +#endif errcode, BZ2_bzlibVersion() ); +#ifndef __ORCAC__ +/* Don't need this for decompression, since error 1007 is only + * produced in the blocksort routines used for compression. + */ if (errcode == 1007) { fprintf(stderr, "\n*** A special note about internal error number 1007 ***\n" @@ -125,6 +146,7 @@ void BZ2_bz__AssertH__fail ( int errcode ) "\n" ); } +#endif exit(3); } @@ -135,9 +157,11 @@ void BZ2_bz__AssertH__fail ( int errcode ) static int bz_config_ok ( void ) { +#ifndef __ORCAC__ if (sizeof(int) != 4) return 0; if (sizeof(short) != 2) return 0; if (sizeof(char) != 1) return 0; +#endif return 1; } @@ -158,6 +182,7 @@ void default_bzfree ( void* opaque, void* addr ) /*---------------------------------------------------*/ +#ifndef __ORCAC__ static void prepare_new_block ( EState* s ) { @@ -169,17 +194,21 @@ void prepare_new_block ( EState* s ) for (i = 0; i < 256; i++) s->inUse[i] = False; s->blockNo++; } +#endif /*---------------------------------------------------*/ +#ifndef __ORCAC__ static void init_RL ( EState* s ) { s->state_in_ch = 256; s->state_in_len = 0; } +#endif +#ifndef __ORCAC__ static Bool isempty_RL ( EState* s ) { @@ -187,9 +216,11 @@ Bool isempty_RL ( EState* s ) return False; else return True; } +#endif /*---------------------------------------------------*/ +#ifndef __ORCAC__ int BZ_API(BZ2_bzCompressInit) ( bz_stream* strm, int blockSize100k, @@ -254,9 +285,11 @@ int BZ_API(BZ2_bzCompressInit) prepare_new_block ( s ); return BZ_OK; } +#endif /*---------------------------------------------------*/ +#ifndef __ORCAC__ static void add_pair_to_block ( EState* s ) { @@ -267,6 +300,29 @@ void add_pair_to_block ( EState* s ) } s->inUse[s->state_in_ch] = True; switch (s->state_in_len) { +#ifdef __ORCAC__ + case 1: + *((s->block)+(s->nblock)) = (UChar)ch; s->nblock++; + break; + case 2: + *((s->block)+(s->nblock)) = (UChar)ch; s->nblock++; + *((s->block)+(s->nblock)) = (UChar)ch; s->nblock++; + break; + case 3: + *((s->block)+(s->nblock)) = (UChar)ch; s->nblock++; + *((s->block)+(s->nblock)) = (UChar)ch; s->nblock++; + *((s->block)+(s->nblock)) = (UChar)ch; s->nblock++; + break; + default: + *((s->inUse)+(s->state_in_len-4)) = True; + *((s->block)+(s->nblock)) = (UChar)ch; s->nblock++; + *((s->block)+(s->nblock)) = (UChar)ch; s->nblock++; + *((s->block)+(s->nblock)) = (UChar)ch; s->nblock++; + *((s->block)+(s->nblock)) = (UChar)ch; s->nblock++; + *((s->block)+(s->nblock)) = ((UChar)(s->state_in_len-4)); + s->nblock++; + break; +#else case 1: s->block[s->nblock] = (UChar)ch; s->nblock++; break; @@ -288,20 +344,51 @@ void add_pair_to_block ( EState* s ) s->block[s->nblock] = ((UChar)(s->state_in_len-4)); s->nblock++; break; +#endif } } +#endif /*---------------------------------------------------*/ +#ifndef __ORCAC__ static void flush_RL ( EState* s ) { if (s->state_in_ch < 256) add_pair_to_block ( s ); init_RL ( s ); } +#endif /*---------------------------------------------------*/ +#ifdef __ORCAC__ +#define ADD_CHAR_TO_BLOCK(zs,zchh0) \ +{ \ + UInt32 zchh = (UInt32)(zchh0); \ + /*-- fast track the common case --*/ \ + if (zchh != zs->state_in_ch && \ + zs->state_in_len == 1) { \ + UChar ch = (UChar)(zs->state_in_ch); \ + BZ_UPDATE_CRC( zs->blockCRC, ch ); \ + *((zs->inUse)+(zs->state_in_ch)) = True; \ + *((zs->block)+(zs->nblock)) = (UChar)ch; \ + zs->nblock++; \ + zs->state_in_ch = zchh; \ + } \ + else \ + /*-- general, uncommon cases --*/ \ + if (zchh != zs->state_in_ch || \ + zs->state_in_len == 255) { \ + if (zs->state_in_ch < 256) \ + add_pair_to_block ( zs ); \ + zs->state_in_ch = zchh; \ + zs->state_in_len = 1; \ + } else { \ + zs->state_in_len++; \ + } \ +} +#else #define ADD_CHAR_TO_BLOCK(zs,zchh0) \ { \ UInt32 zchh = (UInt32)(zchh0); \ @@ -327,9 +414,11 @@ void flush_RL ( EState* s ) zs->state_in_len++; \ } \ } +#endif /*---------------------------------------------------*/ +#ifndef __ORCAC__ static Bool copy_input_until_stop ( EState* s ) { @@ -372,9 +461,11 @@ Bool copy_input_until_stop ( EState* s ) } return progress_in; } +#endif /*---------------------------------------------------*/ +#ifndef __ORCAC__ static Bool copy_output_until_stop ( EState* s ) { @@ -399,9 +490,11 @@ Bool copy_output_until_stop ( EState* s ) return progress_out; } +#endif /*---------------------------------------------------*/ +#ifndef __ORCAC__ static Bool handle_compress ( bz_stream* strm ) { @@ -446,9 +539,11 @@ Bool handle_compress ( bz_stream* strm ) return progress_in || progress_out; } +#endif /*---------------------------------------------------*/ +#ifndef __ORCAC__ int BZ_API(BZ2_bzCompress) ( bz_stream *strm, int action ) { Bool progress; @@ -507,9 +602,11 @@ int BZ_API(BZ2_bzCompress) ( bz_stream *strm, int action ) } return BZ_OK; /*--not reached--*/ } +#endif /*---------------------------------------------------*/ +#ifndef __ORCAC__ int BZ_API(BZ2_bzCompressEnd) ( bz_stream *strm ) { EState* s; @@ -527,6 +624,7 @@ int BZ_API(BZ2_bzCompressEnd) ( bz_stream *strm ) return BZ_OK; } +#endif /*---------------------------------------------------*/ @@ -636,12 +734,20 @@ void unRLE_obuf_to_output_FAST ( DState* s ) UInt32* c_tt = s->tt; UInt32 c_tPos = s->tPos; char* cs_next_out = s->strm->next_out; +#ifdef __ORCAC__ + unsigned long cs_avail_out = s->strm->avail_out; +#else unsigned int cs_avail_out = s->strm->avail_out; +#endif /* end restore */ UInt32 avail_out_INIT = cs_avail_out; Int32 s_save_nblockPP = s->save_nblock+1; +#ifdef __ORCAC__ + unsigned long total_out_lo32_old; +#else unsigned int total_out_lo32_old; +#endif while (True) { @@ -845,7 +951,7 @@ int BZ_API(BZ2_bzDecompress) ( bz_stream *strm ) if (s->nblock_used == s->save_nblock+1 && s->state_out_len == 0) { BZ_FINALISE_CRC ( s->calculatedBlockCRC ); if (s->verbosity >= 3) - VPrintf2 ( " {0x%x, 0x%x}", s->storedBlockCRC, + VPrintf2 ( " {" UInt32_HEXFMT ", " UInt32_HEXFMT "}", s->storedBlockCRC, s->calculatedBlockCRC ); if (s->verbosity >= 2) VPrintf0 ( "]" ); if (s->calculatedBlockCRC != s->storedBlockCRC) @@ -863,7 +969,7 @@ int BZ_API(BZ2_bzDecompress) ( bz_stream *strm ) Int32 r = BZ2_decompress ( s ); if (r == BZ_STREAM_END) { if (s->verbosity >= 3) - VPrintf2 ( "\n combined CRCs: stored = 0x%x, computed = 0x%x", + VPrintf2 ( "\n combined CRCs: stored = " UInt32_HEXFMT ", computed = " UInt32_HEXFMT, s->storedCombinedCRC, s->calculatedCombinedCRC ); if (s->calculatedCombinedCRC != s->storedCombinedCRC) return BZ_DATA_ERROR; @@ -934,6 +1040,7 @@ static Bool myfeof ( FILE* f ) /*---------------------------------------------------*/ +#ifndef __ORCAC__ BZFILE* BZ_API(BZ2_bzWriteOpen) ( int* bzerror, FILE* f, @@ -978,15 +1085,21 @@ BZFILE* BZ_API(BZ2_bzWriteOpen) bzf->initialisedOk = True; return bzf; } +#endif /*---------------------------------------------------*/ +#ifndef __ORCAC__ void BZ_API(BZ2_bzWrite) ( int* bzerror, BZFILE* b, - void* buf, + void* buf, +#ifdef __ORCAC__ + long len ) +#else int len ) +#endif { Int32 n, n2, ret; bzFile* bzf = (bzFile*)b; @@ -1024,29 +1137,45 @@ void BZ_API(BZ2_bzWrite) { BZ_SETERR(BZ_OK); return; }; } } +#endif /*---------------------------------------------------*/ +#ifndef __ORCAC__ void BZ_API(BZ2_bzWriteClose) ( int* bzerror, BZFILE* b, int abandon, +#ifdef __ORCAC__ + unsigned long* nbytes_in, + unsigned long* nbytes_out ) +#else unsigned int* nbytes_in, unsigned int* nbytes_out ) +#endif { BZ2_bzWriteClose64 ( bzerror, b, abandon, nbytes_in, NULL, nbytes_out, NULL ); } +#endif +#ifndef __ORCAC__ void BZ_API(BZ2_bzWriteClose64) ( int* bzerror, BZFILE* b, int abandon, +#ifdef __ORCAC__ + unsigned long* nbytes_in_lo32, + unsigned long* nbytes_in_hi32, + unsigned long* nbytes_out_lo32, + unsigned long* nbytes_out_hi32 ) +#else unsigned int* nbytes_in_lo32, unsigned int* nbytes_in_hi32, unsigned int* nbytes_out_lo32, unsigned int* nbytes_out_hi32 ) +#endif { Int32 n, n2, ret; bzFile* bzf = (bzFile*)b; @@ -1102,6 +1231,7 @@ void BZ_API(BZ2_bzWriteClose64) BZ2_bzCompressEnd ( &(bzf->strm) ); free ( bzf ); } +#endif /*---------------------------------------------------*/ @@ -1179,11 +1309,19 @@ void BZ_API(BZ2_bzReadClose) ( int *bzerror, BZFILE *b ) /*---------------------------------------------------*/ +#ifdef __ORCAC__ +long BZ_API(BZ2_bzRead) +#else int BZ_API(BZ2_bzRead) +#endif ( int* bzerror, BZFILE* b, - void* buf, + void* buf, +#ifdef __ORCAC__ + long len ) +#else int len ) +#endif { Int32 n, ret; bzFile* bzf = (bzFile*)b; @@ -1265,11 +1403,20 @@ void BZ_API(BZ2_bzReadGetUnused) /*---------------------------------------------------*/ /*---------------------------------------------------*/ +#ifndef __ORCAC__ int BZ_API(BZ2_bzBuffToBuffCompress) - ( char* dest, + ( char* dest, +#ifdef __ORCAC__ + unsigned long* destLen, +#else unsigned int* destLen, +#endif char* source, +#ifdef __ORCAC__ + unsigned long sourceLen, +#else unsigned int sourceLen, +#endif int blockSize100k, int verbosity, int workFactor ) @@ -1314,14 +1461,23 @@ int BZ_API(BZ2_bzBuffToBuffCompress) BZ2_bzCompressEnd ( &strm ); return ret; } +#endif /*---------------------------------------------------*/ int BZ_API(BZ2_bzBuffToBuffDecompress) ( char* dest, +#ifdef __ORCAC__ + unsigned long* destLen, +#else unsigned int* destLen, +#endif char* source, +#ifdef __ORCAC__ + unsigned long sourceLen, +#else unsigned int sourceLen, +#endif int small, int verbosity ) { @@ -1390,7 +1546,9 @@ const char * BZ_API(BZ2_bzlibVersion)(void) return BZ_VERSION; } - +/* This stuff is disabled because it may be broken under GNO due to + 16-bit ints. It has not been modified to use longs where needed. */ +#ifndef __ORCAC__ #ifndef BZ_NO_STDIO /*---------------------------------------------------*/ @@ -1586,6 +1744,7 @@ const char * BZ_API(BZ2_bzerror) (BZFILE *b, int *errnum) return bzerrorstrings[err*-1]; } #endif +#endif /* not defined __ORCAC__ */ /*-------------------------------------------------------------*/ diff --git a/bzlib.h b/bzlib.h index 9ac43a1..ffd0cd1 100644 --- a/bzlib.h +++ b/bzlib.h @@ -85,6 +85,27 @@ extern "C" { #define BZ_OUTBUFF_FULL (-8) #define BZ_CONFIG_ERROR (-9) +#ifdef __ORCAC__ +typedef + struct { + char *next_in; + unsigned long avail_in; + unsigned long total_in_lo32; + unsigned long total_in_hi32; + + char *next_out; + unsigned long avail_out; + unsigned long total_out_lo32; + unsigned long total_out_hi32; + + void *state; + + void *(*bzalloc)(void *,long,long); + void (*bzfree)(void *,void *); + void *opaque; + } + bz_stream; +#else typedef struct { char *next_in; @@ -104,6 +125,7 @@ typedef void *opaque; } bz_stream; +#endif #ifndef BZ_IMPORT @@ -195,12 +217,21 @@ BZ_EXTERN void BZ_API(BZ2_bzReadGetUnused) ( int* nUnused ); +#ifdef __ORCAC__ +BZ_EXTERN long BZ_API(BZ2_bzRead) ( + int* bzerror, + BZFILE* b, + void* buf, + long len + ); +#else BZ_EXTERN int BZ_API(BZ2_bzRead) ( int* bzerror, BZFILE* b, void* buf, int len ); +#endif BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) ( int* bzerror, @@ -210,13 +241,31 @@ BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) ( int workFactor ); +#ifdef __ORCAC__ +BZ_EXTERN void BZ_API(BZ2_bzWrite) ( + int* bzerror, + BZFILE* b, + void* buf, + long len + ); +#else BZ_EXTERN void BZ_API(BZ2_bzWrite) ( int* bzerror, BZFILE* b, void* buf, int len ); +#endif +#ifdef __ORCAC__ +BZ_EXTERN void BZ_API(BZ2_bzWriteClose) ( + int* bzerror, + BZFILE* b, + int abandon, + unsigned long* nbytes_in, + unsigned long* nbytes_out + ); +#else BZ_EXTERN void BZ_API(BZ2_bzWriteClose) ( int* bzerror, BZFILE* b, @@ -224,7 +273,19 @@ BZ_EXTERN void BZ_API(BZ2_bzWriteClose) ( unsigned int* nbytes_in, unsigned int* nbytes_out ); +#endif +#ifdef __ORCAC__ +BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) ( + int* bzerror, + BZFILE* b, + int abandon, + unsigned long* nbytes_in_lo32, + unsigned long* nbytes_in_hi32, + unsigned long* nbytes_out_lo32, + unsigned long* nbytes_out_hi32 + ); +#else BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) ( int* bzerror, BZFILE* b, @@ -235,10 +296,31 @@ BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) ( unsigned int* nbytes_out_hi32 ); #endif +#endif /*-- Utility functions --*/ +#ifdef __ORCAC__ +BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) ( + char* dest, + unsigned long* destLen, + char* source, + unsigned long sourceLen, + int blockSize100k, + int verbosity, + int workFactor + ); + +BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) ( + char* dest, + unsigned long* destLen, + char* source, + unsigned long sourceLen, + int small, + int verbosity + ); +#else BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) ( char* dest, unsigned int* destLen, @@ -257,6 +339,7 @@ BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) ( int small, int verbosity ); +#endif /*-- @@ -273,6 +356,9 @@ BZ_EXTERN const char * BZ_API(BZ2_bzlibVersion) ( void ); +/* This stuff is disabled because it may be broken under GNO due to + 16-bit ints. It has not been modified to use longs where needed. */ +#ifndef __ORCAC__ #ifndef BZ_NO_STDIO BZ_EXTERN BZFILE * BZ_API(BZ2_bzopen) ( const char *path, @@ -309,6 +395,7 @@ BZ_EXTERN const char * BZ_API(BZ2_bzerror) ( int *errnum ); #endif +#endif /* not defined __ORCAC__ */ #ifdef __cplusplus } diff --git a/bzlib_private.h b/bzlib_private.h index ff973c3..7320529 100644 --- a/bzlib_private.h +++ b/bzlib_private.h @@ -4,6 +4,8 @@ /*--- bzlib_private.h ---*/ /*-------------------------------------------------------------*/ +/*-- Modified for use under GNO by Stephen Heumann --*/ + /*-- This file is a part of bzip2 and/or libbzip2, a program and library for lossless, block-sorting data compression. @@ -76,13 +78,30 @@ /*-- General stuff. --*/ +#ifdef __GNO__ +#define BZ_VERSION "1.0.2gs1, 07-Jun-2003" +#else #define BZ_VERSION "1.0.2, 30-Dec-2001" +#endif typedef char Char; typedef unsigned char Bool; typedef unsigned char UChar; -typedef int Int32; -typedef unsigned int UInt32; +#ifdef __ORCAC__ + typedef long Int32; + typedef unsigned long UInt32; +# define Int32_FMT "%ld" +# define UInt32_HEX8FMT "0x%8lx" +# define UInt32_HEXFMT "0x%lx" +# define Int32_6FMT "%6ld" +#else + typedef int Int32; + typedef unsigned int UInt32; +# define Int32_FMT "%d" +# define UInt32_HEX8FMT "0x%8x" +# define UInt32_HEXFMT "0x%x" +# define Int32_6FMT "%6d" +#endif /* defined __ORCAC__ */ typedef short Int16; typedef unsigned short UInt16; @@ -162,7 +181,11 @@ extern void bz_internal_error ( int errcode ); /*-- Stuff for randomising repetitive blocks. --*/ +#ifdef __ORCAC__ +extern Int16 BZ2_rNums[512]; +#else extern Int32 BZ2_rNums[512]; +#endif #define BZ_RAND_DECLS \ Int32 rNToGo; \ diff --git a/bzmore b/bzmore deleted file mode 100644 index d314043..0000000 --- a/bzmore +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/sh - -# Bzmore wrapped for bzip2, -# adapted from zmore by Philippe Troin for Debian GNU/Linux. - -PATH="/usr/bin:$PATH"; export PATH - -prog=`echo $0 | sed 's|.*/||'` -case "$prog" in - *less) more=less ;; - *) more=more ;; -esac - -if test "`echo -n a`" = "-n a"; then - # looks like a SysV system: - n1=''; n2='\c' -else - n1='-n'; n2='' -fi -oldtty=`stty -g 2>/dev/null` -if stty -cbreak 2>/dev/null; then - cb='cbreak'; ncb='-cbreak' -else - # 'stty min 1' resets eof to ^a on both SunOS and SysV! - cb='min 1 -icanon'; ncb='icanon eof ^d' -fi -if test $? -eq 0 -a -n "$oldtty"; then - trap 'stty $oldtty 2>/dev/null; exit' 0 2 3 5 10 13 15 -else - trap 'stty $ncb echo 2>/dev/null; exit' 0 2 3 5 10 13 15 -fi - -if test $# = 0; then - if test -t 0; then - echo usage: $prog files... - else - bzip2 -cdfq | eval $more - fi -else - FIRST=1 - for FILE - do - if test $FIRST -eq 0; then - echo $n1 "--More--(Next file: $FILE)$n2" - stty $cb -echo 2>/dev/null - ANS=`dd bs=1 count=1 2>/dev/null` - stty $ncb echo 2>/dev/null - echo " " - if test "$ANS" = 'e' -o "$ANS" = 'q'; then - exit - fi - fi - if test "$ANS" != 's'; then - echo "------> $FILE <------" - bzip2 -cdfq "$FILE" | eval $more - fi - if test -t; then - FIRST=0 - fi - done -fi diff --git a/bzmore.1 b/bzmore.1 deleted file mode 100644 index b437d3b..0000000 --- a/bzmore.1 +++ /dev/null @@ -1,152 +0,0 @@ -.\"Shamelessly copied from zmore.1 by Philippe Troin -.\"for Debian GNU/Linux -.TH BZMORE 1 -.SH NAME -bzmore, bzless \- file perusal filter for crt viewing of bzip2 compressed text -.SH SYNOPSIS -.B bzmore -[ name ... ] -.br -.B bzless -[ name ... ] -.SH NOTE -In the following description, -.I bzless -and -.I less -can be used interchangeably with -.I bzmore -and -.I more. -.SH DESCRIPTION -.I Bzmore -is a filter which allows examination of compressed or plain text files -one screenful at a time on a soft-copy terminal. -.I bzmore -works on files compressed with -.I bzip2 -and also on uncompressed files. -If a file does not exist, -.I bzmore -looks for a file of the same name with the addition of a .bz2 suffix. -.PP -.I Bzmore -normally pauses after each screenful, printing --More-- -at the bottom of the screen. -If the user then types a carriage return, one more line is displayed. -If the user hits a space, -another screenful is displayed. Other possibilities are enumerated later. -.PP -.I Bzmore -looks in the file -.I /etc/termcap -to determine terminal characteristics, -and to determine the default window size. -On a terminal capable of displaying 24 lines, -the default window size is 22 lines. -Other sequences which may be typed when -.I bzmore -pauses, and their effects, are as follows (\fIi\fP is an optional integer -argument, defaulting to 1) : -.PP -.IP \fIi\|\fP -display -.I i -more lines, (or another screenful if no argument is given) -.PP -.IP ^D -display 11 more lines (a ``scroll''). -If -.I i -is given, then the scroll size is set to \fIi\|\fP. -.PP -.IP d -same as ^D (control-D) -.PP -.IP \fIi\|\fPz -same as typing a space except that \fIi\|\fP, if present, becomes the new -window size. Note that the window size reverts back to the default at the -end of the current file. -.PP -.IP \fIi\|\fPs -skip \fIi\|\fP lines and print a screenful of lines -.PP -.IP \fIi\|\fPf -skip \fIi\fP screenfuls and print a screenful of lines -.PP -.IP "q or Q" -quit reading the current file; go on to the next (if any) -.PP -.IP "e or q" -When the prompt --More--(Next file: -.IR file ) -is printed, this command causes bzmore to exit. -.PP -.IP s -When the prompt --More--(Next file: -.IR file ) -is printed, this command causes bzmore to skip the next file and continue. -.PP -.IP = -Display the current line number. -.PP -.IP \fIi\|\fP/expr -search for the \fIi\|\fP-th occurrence of the regular expression \fIexpr.\fP -If the pattern is not found, -.I bzmore -goes on to the next file (if any). -Otherwise, a screenful is displayed, starting two lines before the place -where the expression was found. -The user's erase and kill characters may be used to edit the regular -expression. -Erasing back past the first column cancels the search command. -.PP -.IP \fIi\|\fPn -search for the \fIi\|\fP-th occurrence of the last regular expression entered. -.PP -.IP !command -invoke a shell with \fIcommand\|\fP. -The character `!' in "command" are replaced with the -previous shell command. The sequence "\\!" is replaced by "!". -.PP -.IP ":q or :Q" -quit reading the current file; go on to the next (if any) -(same as q or Q). -.PP -.IP . -(dot) repeat the previous command. -.PP -The commands take effect immediately, i.e., it is not necessary to -type a carriage return. -Up to the time when the command character itself is given, -the user may hit the line kill character to cancel the numerical -argument being formed. -In addition, the user may hit the erase character to redisplay the ---More-- message. -.PP -At any time when output is being sent to the terminal, the user can -hit the quit key (normally control\-\\). -.I Bzmore -will stop sending output, and will display the usual --More-- -prompt. -The user may then enter one of the above commands in the normal manner. -Unfortunately, some output is lost when this is done, due to the -fact that any characters waiting in the terminal's output queue -are flushed when the quit signal occurs. -.PP -The terminal is set to -.I noecho -mode by this program so that the output can be continuous. -What you type will thus not show on your terminal, except for the / and ! -commands. -.PP -If the standard output is not a teletype, then -.I bzmore -acts just like -.I bzcat, -except that a header is printed before each file. -.SH FILES -.DT -/etc/termcap Terminal data base -.SH "SEE ALSO" -more(1), less(1), bzip2(1), bzdiff(1), bzgrep(1) diff --git a/compress.c b/compress.c deleted file mode 100644 index 56501c1..0000000 --- a/compress.c +++ /dev/null @@ -1,714 +0,0 @@ - -/*-------------------------------------------------------------*/ -/*--- Compression machinery (not incl block sorting) ---*/ -/*--- compress.c ---*/ -/*-------------------------------------------------------------*/ - -/*-- - This file is a part of bzip2 and/or libbzip2, a program and - library for lossless, block-sorting data compression. - - Copyright (C) 1996-2002 Julian R Seward. All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - 2. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment in the product - documentation would be appreciated but is not required. - - 3. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 4. The name of the author may not be used to endorse or promote - products derived from this software without specific prior written - permission. - - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS - OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE - GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - Julian Seward, Cambridge, UK. - jseward@acm.org - bzip2/libbzip2 version 1.0 of 21 March 2000 - - This program is based on (at least) the work of: - Mike Burrows - David Wheeler - Peter Fenwick - Alistair Moffat - Radford Neal - Ian H. Witten - Robert Sedgewick - Jon L. Bentley - - For more information on these sources, see the manual. ---*/ - -/*-- - CHANGES - ~~~~~~~ - 0.9.0 -- original version. - - 0.9.0a/b -- no changes in this file. - - 0.9.0c - * changed setting of nGroups in sendMTFValues() so as to - do a bit better on small files ---*/ - -#include "bzlib_private.h" - - -/*---------------------------------------------------*/ -/*--- Bit stream I/O ---*/ -/*---------------------------------------------------*/ - -/*---------------------------------------------------*/ -void BZ2_bsInitWrite ( EState* s ) -{ - s->bsLive = 0; - s->bsBuff = 0; -} - - -/*---------------------------------------------------*/ -static -void bsFinishWrite ( EState* s ) -{ - while (s->bsLive > 0) { - s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24); - s->numZ++; - s->bsBuff <<= 8; - s->bsLive -= 8; - } -} - - -/*---------------------------------------------------*/ -#define bsNEEDW(nz) \ -{ \ - while (s->bsLive >= 8) { \ - s->zbits[s->numZ] \ - = (UChar)(s->bsBuff >> 24); \ - s->numZ++; \ - s->bsBuff <<= 8; \ - s->bsLive -= 8; \ - } \ -} - - -/*---------------------------------------------------*/ -static -__inline__ -void bsW ( EState* s, Int32 n, UInt32 v ) -{ - bsNEEDW ( n ); - s->bsBuff |= (v << (32 - s->bsLive - n)); - s->bsLive += n; -} - - -/*---------------------------------------------------*/ -static -void bsPutUInt32 ( EState* s, UInt32 u ) -{ - bsW ( s, 8, (u >> 24) & 0xffL ); - bsW ( s, 8, (u >> 16) & 0xffL ); - bsW ( s, 8, (u >> 8) & 0xffL ); - bsW ( s, 8, u & 0xffL ); -} - - -/*---------------------------------------------------*/ -static -void bsPutUChar ( EState* s, UChar c ) -{ - bsW( s, 8, (UInt32)c ); -} - - -/*---------------------------------------------------*/ -/*--- The back end proper ---*/ -/*---------------------------------------------------*/ - -/*---------------------------------------------------*/ -static -void makeMaps_e ( EState* s ) -{ - Int32 i; - s->nInUse = 0; - for (i = 0; i < 256; i++) - if (s->inUse[i]) { - s->unseqToSeq[i] = s->nInUse; - s->nInUse++; - } -} - - -/*---------------------------------------------------*/ -static -void generateMTFValues ( EState* s ) -{ - UChar yy[256]; - Int32 i, j; - Int32 zPend; - Int32 wr; - Int32 EOB; - - /* - After sorting (eg, here), - s->arr1 [ 0 .. s->nblock-1 ] holds sorted order, - and - ((UChar*)s->arr2) [ 0 .. s->nblock-1 ] - holds the original block data. - - The first thing to do is generate the MTF values, - and put them in - ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ]. - Because there are strictly fewer or equal MTF values - than block values, ptr values in this area are overwritten - with MTF values only when they are no longer needed. - - The final compressed bitstream is generated into the - area starting at - (UChar*) (&((UChar*)s->arr2)[s->nblock]) - - These storage aliases are set up in bzCompressInit(), - except for the last one, which is arranged in - compressBlock(). - */ - UInt32* ptr = s->ptr; - UChar* block = s->block; - UInt16* mtfv = s->mtfv; - - makeMaps_e ( s ); - EOB = s->nInUse+1; - - for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0; - - wr = 0; - zPend = 0; - for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i; - - for (i = 0; i < s->nblock; i++) { - UChar ll_i; - AssertD ( wr <= i, "generateMTFValues(1)" ); - j = ptr[i]-1; if (j < 0) j += s->nblock; - ll_i = s->unseqToSeq[block[j]]; - AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" ); - - if (yy[0] == ll_i) { - zPend++; - } else { - - if (zPend > 0) { - zPend--; - while (True) { - if (zPend & 1) { - mtfv[wr] = BZ_RUNB; wr++; - s->mtfFreq[BZ_RUNB]++; - } else { - mtfv[wr] = BZ_RUNA; wr++; - s->mtfFreq[BZ_RUNA]++; - } - if (zPend < 2) break; - zPend = (zPend - 2) / 2; - }; - zPend = 0; - } - { - register UChar rtmp; - register UChar* ryy_j; - register UChar rll_i; - rtmp = yy[1]; - yy[1] = yy[0]; - ryy_j = &(yy[1]); - rll_i = ll_i; - while ( rll_i != rtmp ) { - register UChar rtmp2; - ryy_j++; - rtmp2 = rtmp; - rtmp = *ryy_j; - *ryy_j = rtmp2; - }; - yy[0] = rtmp; - j = ryy_j - &(yy[0]); - mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++; - } - - } - } - - if (zPend > 0) { - zPend--; - while (True) { - if (zPend & 1) { - mtfv[wr] = BZ_RUNB; wr++; - s->mtfFreq[BZ_RUNB]++; - } else { - mtfv[wr] = BZ_RUNA; wr++; - s->mtfFreq[BZ_RUNA]++; - } - if (zPend < 2) break; - zPend = (zPend - 2) / 2; - }; - zPend = 0; - } - - mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++; - - s->nMTF = wr; -} - - -/*---------------------------------------------------*/ -#define BZ_LESSER_ICOST 0 -#define BZ_GREATER_ICOST 15 - -static -void sendMTFValues ( EState* s ) -{ - Int32 v, t, i, j, gs, ge, totc, bt, bc, iter; - Int32 nSelectors, alphaSize, minLen, maxLen, selCtr; - Int32 nGroups, nBytes; - - /*-- - UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; - is a global since the decoder also needs it. - - Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; - Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; - are also globals only used in this proc. - Made global to keep stack frame size small. - --*/ - - - UInt16 cost[BZ_N_GROUPS]; - Int32 fave[BZ_N_GROUPS]; - - UInt16* mtfv = s->mtfv; - - if (s->verbosity >= 3) - VPrintf3( " %d in block, %d after MTF & 1-2 coding, " - "%d+2 syms in use\n", - s->nblock, s->nMTF, s->nInUse ); - - alphaSize = s->nInUse+2; - for (t = 0; t < BZ_N_GROUPS; t++) - for (v = 0; v < alphaSize; v++) - s->len[t][v] = BZ_GREATER_ICOST; - - /*--- Decide how many coding tables to use ---*/ - AssertH ( s->nMTF > 0, 3001 ); - if (s->nMTF < 200) nGroups = 2; else - if (s->nMTF < 600) nGroups = 3; else - if (s->nMTF < 1200) nGroups = 4; else - if (s->nMTF < 2400) nGroups = 5; else - nGroups = 6; - - /*--- Generate an initial set of coding tables ---*/ - { - Int32 nPart, remF, tFreq, aFreq; - - nPart = nGroups; - remF = s->nMTF; - gs = 0; - while (nPart > 0) { - tFreq = remF / nPart; - ge = gs-1; - aFreq = 0; - while (aFreq < tFreq && ge < alphaSize-1) { - ge++; - aFreq += s->mtfFreq[ge]; - } - - if (ge > gs - && nPart != nGroups && nPart != 1 - && ((nGroups-nPart) % 2 == 1)) { - aFreq -= s->mtfFreq[ge]; - ge--; - } - - if (s->verbosity >= 3) - VPrintf5( " initial group %d, [%d .. %d], " - "has %d syms (%4.1f%%)\n", - nPart, gs, ge, aFreq, - (100.0 * (float)aFreq) / (float)(s->nMTF) ); - - for (v = 0; v < alphaSize; v++) - if (v >= gs && v <= ge) - s->len[nPart-1][v] = BZ_LESSER_ICOST; else - s->len[nPart-1][v] = BZ_GREATER_ICOST; - - nPart--; - gs = ge+1; - remF -= aFreq; - } - } - - /*--- - Iterate up to BZ_N_ITERS times to improve the tables. - ---*/ - for (iter = 0; iter < BZ_N_ITERS; iter++) { - - for (t = 0; t < nGroups; t++) fave[t] = 0; - - for (t = 0; t < nGroups; t++) - for (v = 0; v < alphaSize; v++) - s->rfreq[t][v] = 0; - - /*--- - Set up an auxiliary length table which is used to fast-track - the common case (nGroups == 6). - ---*/ - if (nGroups == 6) { - for (v = 0; v < alphaSize; v++) { - s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v]; - s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v]; - s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v]; - } - } - - nSelectors = 0; - totc = 0; - gs = 0; - while (True) { - - /*--- Set group start & end marks. --*/ - if (gs >= s->nMTF) break; - ge = gs + BZ_G_SIZE - 1; - if (ge >= s->nMTF) ge = s->nMTF-1; - - /*-- - Calculate the cost of this group as coded - by each of the coding tables. - --*/ - for (t = 0; t < nGroups; t++) cost[t] = 0; - - if (nGroups == 6 && 50 == ge-gs+1) { - /*--- fast track the common case ---*/ - register UInt32 cost01, cost23, cost45; - register UInt16 icv; - cost01 = cost23 = cost45 = 0; - -# define BZ_ITER(nn) \ - icv = mtfv[gs+(nn)]; \ - cost01 += s->len_pack[icv][0]; \ - cost23 += s->len_pack[icv][1]; \ - cost45 += s->len_pack[icv][2]; \ - - BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4); - BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9); - BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14); - BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19); - BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24); - BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29); - BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34); - BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39); - BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44); - BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49); - -# undef BZ_ITER - - cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16; - cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16; - cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16; - - } else { - /*--- slow version which correctly handles all situations ---*/ - for (i = gs; i <= ge; i++) { - UInt16 icv = mtfv[i]; - for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv]; - } - } - - /*-- - Find the coding table which is best for this group, - and record its identity in the selector table. - --*/ - bc = 999999999; bt = -1; - for (t = 0; t < nGroups; t++) - if (cost[t] < bc) { bc = cost[t]; bt = t; }; - totc += bc; - fave[bt]++; - s->selector[nSelectors] = bt; - nSelectors++; - - /*-- - Increment the symbol frequencies for the selected table. - --*/ - if (nGroups == 6 && 50 == ge-gs+1) { - /*--- fast track the common case ---*/ - -# define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++ - - BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4); - BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9); - BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14); - BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19); - BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24); - BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29); - BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34); - BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39); - BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44); - BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49); - -# undef BZ_ITUR - - } else { - /*--- slow version which correctly handles all situations ---*/ - for (i = gs; i <= ge; i++) - s->rfreq[bt][ mtfv[i] ]++; - } - - gs = ge+1; - } - if (s->verbosity >= 3) { - VPrintf2 ( " pass %d: size is %d, grp uses are ", - iter+1, totc/8 ); - for (t = 0; t < nGroups; t++) - VPrintf1 ( "%d ", fave[t] ); - VPrintf0 ( "\n" ); - } - - /*-- - Recompute the tables based on the accumulated frequencies. - --*/ - for (t = 0; t < nGroups; t++) - BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]), - alphaSize, 20 ); - } - - - AssertH( nGroups < 8, 3002 ); - AssertH( nSelectors < 32768 && - nSelectors <= (2 + (900000 / BZ_G_SIZE)), - 3003 ); - - - /*--- Compute MTF values for the selectors. ---*/ - { - UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp; - for (i = 0; i < nGroups; i++) pos[i] = i; - for (i = 0; i < nSelectors; i++) { - ll_i = s->selector[i]; - j = 0; - tmp = pos[j]; - while ( ll_i != tmp ) { - j++; - tmp2 = tmp; - tmp = pos[j]; - pos[j] = tmp2; - }; - pos[0] = tmp; - s->selectorMtf[i] = j; - } - }; - - /*--- Assign actual codes for the tables. --*/ - for (t = 0; t < nGroups; t++) { - minLen = 32; - maxLen = 0; - for (i = 0; i < alphaSize; i++) { - if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; - if (s->len[t][i] < minLen) minLen = s->len[t][i]; - } - AssertH ( !(maxLen > 20), 3004 ); - AssertH ( !(minLen < 1), 3005 ); - BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]), - minLen, maxLen, alphaSize ); - } - - /*--- Transmit the mapping table. ---*/ - { - Bool inUse16[16]; - for (i = 0; i < 16; i++) { - inUse16[i] = False; - for (j = 0; j < 16; j++) - if (s->inUse[i * 16 + j]) inUse16[i] = True; - } - - nBytes = s->numZ; - for (i = 0; i < 16; i++) - if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0); - - for (i = 0; i < 16; i++) - if (inUse16[i]) - for (j = 0; j < 16; j++) { - if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0); - } - - if (s->verbosity >= 3) - VPrintf1( " bytes: mapping %d, ", s->numZ-nBytes ); - } - - /*--- Now the selectors. ---*/ - nBytes = s->numZ; - bsW ( s, 3, nGroups ); - bsW ( s, 15, nSelectors ); - for (i = 0; i < nSelectors; i++) { - for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1); - bsW(s,1,0); - } - if (s->verbosity >= 3) - VPrintf1( "selectors %d, ", s->numZ-nBytes ); - - /*--- Now the coding tables. ---*/ - nBytes = s->numZ; - - for (t = 0; t < nGroups; t++) { - Int32 curr = s->len[t][0]; - bsW ( s, 5, curr ); - for (i = 0; i < alphaSize; i++) { - while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ }; - while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ }; - bsW ( s, 1, 0 ); - } - } - - if (s->verbosity >= 3) - VPrintf1 ( "code lengths %d, ", s->numZ-nBytes ); - - /*--- And finally, the block data proper ---*/ - nBytes = s->numZ; - selCtr = 0; - gs = 0; - while (True) { - if (gs >= s->nMTF) break; - ge = gs + BZ_G_SIZE - 1; - if (ge >= s->nMTF) ge = s->nMTF-1; - AssertH ( s->selector[selCtr] < nGroups, 3006 ); - - if (nGroups == 6 && 50 == ge-gs+1) { - /*--- fast track the common case ---*/ - UInt16 mtfv_i; - UChar* s_len_sel_selCtr - = &(s->len[s->selector[selCtr]][0]); - Int32* s_code_sel_selCtr - = &(s->code[s->selector[selCtr]][0]); - -# define BZ_ITAH(nn) \ - mtfv_i = mtfv[gs+(nn)]; \ - bsW ( s, \ - s_len_sel_selCtr[mtfv_i], \ - s_code_sel_selCtr[mtfv_i] ) - - BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4); - BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9); - BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14); - BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19); - BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24); - BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29); - BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34); - BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39); - BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44); - BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49); - -# undef BZ_ITAH - - } else { - /*--- slow version which correctly handles all situations ---*/ - for (i = gs; i <= ge; i++) { - bsW ( s, - s->len [s->selector[selCtr]] [mtfv[i]], - s->code [s->selector[selCtr]] [mtfv[i]] ); - } - } - - - gs = ge+1; - selCtr++; - } - AssertH( selCtr == nSelectors, 3007 ); - - if (s->verbosity >= 3) - VPrintf1( "codes %d\n", s->numZ-nBytes ); -} - - -/*---------------------------------------------------*/ -void BZ2_compressBlock ( EState* s, Bool is_last_block ) -{ - if (s->nblock > 0) { - - BZ_FINALISE_CRC ( s->blockCRC ); - s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31); - s->combinedCRC ^= s->blockCRC; - if (s->blockNo > 1) s->numZ = 0; - - if (s->verbosity >= 2) - VPrintf4( " block %d: crc = 0x%8x, " - "combined CRC = 0x%8x, size = %d\n", - s->blockNo, s->blockCRC, s->combinedCRC, s->nblock ); - - BZ2_blockSort ( s ); - } - - s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]); - - /*-- If this is the first block, create the stream header. --*/ - if (s->blockNo == 1) { - BZ2_bsInitWrite ( s ); - bsPutUChar ( s, BZ_HDR_B ); - bsPutUChar ( s, BZ_HDR_Z ); - bsPutUChar ( s, BZ_HDR_h ); - bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) ); - } - - if (s->nblock > 0) { - - bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 ); - bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 ); - bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 ); - - /*-- Now the block's CRC, so it is in a known place. --*/ - bsPutUInt32 ( s, s->blockCRC ); - - /*-- - Now a single bit indicating (non-)randomisation. - As of version 0.9.5, we use a better sorting algorithm - which makes randomisation unnecessary. So always set - the randomised bit to 'no'. Of course, the decoder - still needs to be able to handle randomised blocks - so as to maintain backwards compatibility with - older versions of bzip2. - --*/ - bsW(s,1,0); - - bsW ( s, 24, s->origPtr ); - generateMTFValues ( s ); - sendMTFValues ( s ); - } - - - /*-- If this is the last block, add the stream trailer. --*/ - if (is_last_block) { - - bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 ); - bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 ); - bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 ); - bsPutUInt32 ( s, s->combinedCRC ); - if (s->verbosity >= 2) - VPrintf1( " final combined CRC = 0x%x\n ", s->combinedCRC ); - bsFinishWrite ( s ); - } -} - - -/*-------------------------------------------------------------*/ -/*--- end compress.c ---*/ -/*-------------------------------------------------------------*/ diff --git a/decompress.c b/decompress.c index e921347..66b5ef8 100644 --- a/decompress.c +++ b/decompress.c @@ -4,6 +4,11 @@ /*--- decompress.c ---*/ /*-------------------------------------------------------------*/ +/*-- Modified for use under GNO by Stephen Heumann --*/ +#ifdef __ORCAC__ +segment "decompress", dynamic; +#endif + /*-- This file is a part of bzip2 and/or libbzip2, a program and library for lossless, block-sorting data compression. @@ -80,6 +85,36 @@ void makeMaps_d ( DState* s ) #define RETURN(rrr) \ { retVal = rrr; goto save_state_and_return; }; +#ifdef __ORCAC__ +void getBitsOrcaHack(DState *s) { + s->bsBuff \ + = (s->bsBuff << 8) | \ + ((UInt32) \ + (*((UChar*)(s->strm->next_in)))); \ + s->bsLive += 8; \ + s->strm->next_in++; \ + s->strm->avail_in--; \ + s->strm->total_in_lo32++; \ + if (s->strm->total_in_lo32 == 0) \ + s->strm->total_in_hi32++; \ + } + +#define GET_BITS(lll,vvv,nnn) \ + case lll: s->state = lll; \ + while (True) { \ + if (s->bsLive >= nnn) { \ + UInt32 v; \ + v = (s->bsBuff >> \ + (s->bsLive-nnn)) & ((1 << nnn)-1); \ + s->bsLive -= nnn; \ + vvv = v; \ + break; \ + } \ + if (s->strm->avail_in == 0) RETURN(BZ_OK); \ + getBitsOrcaHack(s); \ + } + +#else #define GET_BITS(lll,vvv,nnn) \ case lll: s->state = lll; \ while (True) { \ @@ -103,6 +138,7 @@ void makeMaps_d ( DState* s ) if (s->strm->total_in_lo32 == 0) \ s->strm->total_in_hi32++; \ } +#endif #define GET_UCHAR(lll,uuu) \ GET_BITS(lll,uuu,8) @@ -141,7 +177,6 @@ void makeMaps_d ( DState* s ) lval = gPerm[zvec - gBase[zn]]; \ } - /*---------------------------------------------------*/ Int32 BZ2_decompress ( DState* s ) { @@ -276,7 +311,7 @@ Int32 BZ2_decompress ( DState* s ) s->currBlockNo++; if (s->verbosity >= 2) - VPrintf1 ( "\n [%d: huff+mtf ", s->currBlockNo ); + VPrintf1 ( "\n [" Int32_FMT ": huff+mtf ", s->currBlockNo ); s->storedBlockCRC = 0; GET_UCHAR(BZ_X_BCRC_1, uc); @@ -341,8 +376,13 @@ Int32 BZ2_decompress ( DState* s ) /*--- Undo the MTF values for the selectors. ---*/ { +#ifdef __ORCAC__ + UChar pos[BZ_N_GROUPS] = { 0, 1, 2, 3, 4, 5 }; + UChar tmp, v; +#else UChar pos[BZ_N_GROUPS], tmp, v; for (v = 0; v < nGroups; v++) pos[v] = v; +#endif for (i = 0; i < nSelectors; i++) { v = s->selectorMtf[i]; @@ -435,14 +475,22 @@ Int32 BZ2_decompress ( DState* s ) if (s->smallDecompress) while (es > 0) { if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); +#ifdef __ORCAC__ + *((UInt16 *)(s->ll16)+nblock) = (UInt16)uc; +#else s->ll16[nblock] = (UInt16)uc; +#endif nblock++; es--; } else while (es > 0) { if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); +#ifdef __ORCAC__ + *((UInt32 *)(s->tt)+nblock) = (UInt32)uc; +#else s->tt[nblock] = (UInt32)uc; +#endif nblock++; es--; }; @@ -509,8 +557,13 @@ Int32 BZ2_decompress ( DState* s ) s->unzftab[s->seqToUnseq[uc]]++; if (s->smallDecompress) +#ifdef __ORCAC__ + *((UInt16 *)(s->ll16)+nblock) = (UInt16)(s->seqToUnseq[uc]); else + *((UInt32 *)(s->tt)+nblock) = (UInt32)(s->seqToUnseq[uc]); +#else s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else s->tt[nblock] = (UInt32)(s->seqToUnseq[uc]); +#endif nblock++; GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym); @@ -542,7 +595,11 @@ Int32 BZ2_decompress ( DState* s ) /*-- compute the T vector --*/ for (i = 0; i < nblock; i++) { +#ifdef __ORCAC__ + uc = (UChar) *((UInt16 *)(s->ll16)+i); +#else uc = (UChar)(s->ll16[i]); +#endif SET_LL(i, s->cftabCopy[uc]); s->cftabCopy[uc]++; } @@ -572,12 +629,21 @@ Int32 BZ2_decompress ( DState* s ) /*-- compute the T^(-1) vector --*/ for (i = 0; i < nblock; i++) { +#ifdef __ORCAC__ + uc = (UChar)((*((UInt32 *)(s->tt)+i)) & 0xff); + *((UInt32 *)(s->tt)+(s->cftab[uc])) |= (i << 8); +#else uc = (UChar)(s->tt[i] & 0xff); s->tt[s->cftab[uc]] |= (i << 8); +#endif s->cftab[uc]++; } +#ifdef __ORCAC__ + s->tPos = (*((UInt32 *)(s->tt)+(s->origPtr))) >> 8; +#else s->tPos = s->tt[s->origPtr] >> 8; +#endif s->nblock_used = 0; if (s->blockRandomised) { BZ_RAND_INIT_MASK; diff --git a/dlltest.c b/dlltest.c deleted file mode 100644 index 2d7dcca..0000000 --- a/dlltest.c +++ /dev/null @@ -1,176 +0,0 @@ -/* - minibz2 - libbz2.dll test program. - by Yoshioka Tsuneo(QWF00133@nifty.ne.jp/tsuneo-y@is.aist-nara.ac.jp) - This file is Public Domain. - welcome any email to me. - - usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename] -*/ - -#define BZ_IMPORT -#include -#include -#include "bzlib.h" -#ifdef _WIN32 -#include -#endif - - -#ifdef _WIN32 - -#define BZ2_LIBNAME "libbz2-1.0.2.DLL" - -#include -static int BZ2DLLLoaded = 0; -static HINSTANCE BZ2DLLhLib; -int BZ2DLLLoadLibrary(void) -{ - HINSTANCE hLib; - - if(BZ2DLLLoaded==1){return 0;} - hLib=LoadLibrary(BZ2_LIBNAME); - if(hLib == NULL){ - fprintf(stderr,"Can't load %s\n",BZ2_LIBNAME); - return -1; - } - BZ2_bzlibVersion=GetProcAddress(hLib,"BZ2_bzlibVersion"); - BZ2_bzopen=GetProcAddress(hLib,"BZ2_bzopen"); - BZ2_bzdopen=GetProcAddress(hLib,"BZ2_bzdopen"); - BZ2_bzread=GetProcAddress(hLib,"BZ2_bzread"); - BZ2_bzwrite=GetProcAddress(hLib,"BZ2_bzwrite"); - BZ2_bzflush=GetProcAddress(hLib,"BZ2_bzflush"); - BZ2_bzclose=GetProcAddress(hLib,"BZ2_bzclose"); - BZ2_bzerror=GetProcAddress(hLib,"BZ2_bzerror"); - - if (!BZ2_bzlibVersion || !BZ2_bzopen || !BZ2_bzdopen - || !BZ2_bzread || !BZ2_bzwrite || !BZ2_bzflush - || !BZ2_bzclose || !BZ2_bzerror) { - fprintf(stderr,"GetProcAddress failed.\n"); - return -1; - } - BZ2DLLLoaded=1; - BZ2DLLhLib=hLib; - return 0; - -} -int BZ2DLLFreeLibrary(void) -{ - if(BZ2DLLLoaded==0){return 0;} - FreeLibrary(BZ2DLLhLib); - BZ2DLLLoaded=0; -} -#endif /* WIN32 */ - -void usage(void) -{ - puts("usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename]"); -} - -int main(int argc,char *argv[]) -{ - int decompress = 0; - int level = 9; - char *fn_r = NULL; - char *fn_w = NULL; - -#ifdef _WIN32 - if(BZ2DLLLoadLibrary()<0){ - fprintf(stderr,"Loading of %s failed. Giving up.\n", BZ2_LIBNAME); - exit(1); - } - printf("Loading of %s succeeded. Library version is %s.\n", - BZ2_LIBNAME, BZ2_bzlibVersion() ); -#endif - while(++argv,--argc){ - if(**argv =='-' || **argv=='/'){ - char *p; - - for(p=*argv+1;*p;p++){ - if(*p=='d'){ - decompress = 1; - }else if('1'<=*p && *p<='9'){ - level = *p - '0'; - }else{ - usage(); - exit(1); - } - } - }else{ - break; - } - } - if(argc>=1){ - fn_r = *argv; - argc--;argv++; - }else{ - fn_r = NULL; - } - if(argc>=1){ - fn_w = *argv; - argc--;argv++; - }else{ - fn_w = NULL; - } - { - int len; - char buff[0x1000]; - char mode[10]; - - if(decompress){ - BZFILE *BZ2fp_r = NULL; - FILE *fp_w = NULL; - - if(fn_w){ - if((fp_w = fopen(fn_w,"wb"))==NULL){ - printf("can't open [%s]\n",fn_w); - perror("reason:"); - exit(1); - } - }else{ - fp_w = stdout; - } - if((fn_r == NULL && (BZ2fp_r = BZ2_bzdopen(fileno(stdin),"rb"))==NULL) - || (fn_r != NULL && (BZ2fp_r = BZ2_bzopen(fn_r,"rb"))==NULL)){ - printf("can't bz2openstream\n"); - exit(1); - } - while((len=BZ2_bzread(BZ2fp_r,buff,0x1000))>0){ - fwrite(buff,1,len,fp_w); - } - BZ2_bzclose(BZ2fp_r); - if(fp_w != stdout) fclose(fp_w); - }else{ - BZFILE *BZ2fp_w = NULL; - FILE *fp_r = NULL; - - if(fn_r){ - if((fp_r = fopen(fn_r,"rb"))==NULL){ - printf("can't open [%s]\n",fn_r); - perror("reason:"); - exit(1); - } - }else{ - fp_r = stdin; - } - mode[0]='w'; - mode[1] = '0' + level; - mode[2] = '\0'; - - if((fn_w == NULL && (BZ2fp_w = BZ2_bzdopen(fileno(stdout),mode))==NULL) - || (fn_w !=NULL && (BZ2fp_w = BZ2_bzopen(fn_w,mode))==NULL)){ - printf("can't bz2openstream\n"); - exit(1); - } - while((len=fread(buff,1,0x1000,fp_r))>0){ - BZ2_bzwrite(BZ2fp_w,buff,len); - } - BZ2_bzclose(BZ2fp_w); - if(fp_r!=stdin)fclose(fp_r); - } - } -#ifdef _WIN32 - BZ2DLLFreeLibrary(); -#endif - return 0; -} diff --git a/dlltest.dsp b/dlltest.dsp deleted file mode 100644 index 4b1615e..0000000 --- a/dlltest.dsp +++ /dev/null @@ -1,93 +0,0 @@ -# Microsoft Developer Studio Project File - Name="dlltest" - Package Owner=<4> -# Microsoft Developer Studio Generated Build File, Format Version 5.00 -# ** •ÒW‚µ‚È‚¢‚Å‚­‚¾‚³‚¢ ** - -# TARGTYPE "Win32 (x86) Console Application" 0x0103 - -CFG=dlltest - Win32 Debug -!MESSAGE ‚±‚ê‚Í—LŒø‚ÈÒ²¸Ì§²Ù‚Å‚Í‚ ‚è‚Ü‚¹‚ñB ‚±‚ÌÌßÛ¼Þª¸Ä‚ðËÞÙÄÞ‚·‚邽‚ß‚É‚Í NMAKE ‚ðŽg—p‚µ‚Ä‚­‚¾‚³‚¢B -!MESSAGE [Ò²¸Ì§²Ù‚Ì´¸½Îß°Ä] ºÏÝÄÞ‚ðŽg—p‚µ‚ÄŽÀs‚µ‚Ä‚­‚¾‚³‚¢ -!MESSAGE -!MESSAGE NMAKE /f "dlltest.mak". -!MESSAGE -!MESSAGE NMAKE ‚ÌŽÀsŽž‚É\¬‚ðŽw’è‚Å‚«‚Ü‚· -!MESSAGE ºÏÝÄÞ ×²Ýã‚ÅϸۂÌÝ’è‚ð’è‹`‚µ‚Ü‚·B—á: -!MESSAGE -!MESSAGE NMAKE /f "dlltest.mak" CFG="dlltest - Win32 Debug" -!MESSAGE -!MESSAGE ‘I‘ð‰Â”\‚ÈËÞÙÄÞ Ó°ÄÞ: -!MESSAGE -!MESSAGE "dlltest - Win32 Release" ("Win32 (x86) Console Application" —p) -!MESSAGE "dlltest - Win32 Debug" ("Win32 (x86) Console Application" —p) -!MESSAGE - -# Begin Project -# PROP Scc_ProjName "" -# PROP Scc_LocalPath "" -CPP=cl.exe -RSC=rc.exe - -!IF "$(CFG)" == "dlltest - Win32 Release" - -# PROP BASE Use_MFC 0 -# PROP BASE Use_Debug_Libraries 0 -# PROP BASE Output_Dir "Release" -# PROP BASE Intermediate_Dir "Release" -# PROP BASE Target_Dir "" -# PROP Use_MFC 0 -# PROP Use_Debug_Libraries 0 -# PROP Output_Dir "Release" -# PROP Intermediate_Dir "Release" -# PROP Ignore_Export_Lib 0 -# PROP Target_Dir "" -# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c -# ADD CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c -# ADD BASE RSC /l 0x411 /d "NDEBUG" -# ADD RSC /l 0x411 /d "NDEBUG" -BSC32=bscmake.exe -# ADD BASE BSC32 /nologo -# ADD BSC32 /nologo -LINK32=link.exe -# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 -# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 /out:"minibz2.exe" - -!ELSEIF "$(CFG)" == "dlltest - Win32 Debug" - -# PROP BASE Use_MFC 0 -# PROP BASE Use_Debug_Libraries 1 -# PROP BASE Output_Dir "dlltest_" -# PROP BASE Intermediate_Dir "dlltest_" -# PROP BASE Target_Dir "" -# PROP Use_MFC 0 -# PROP Use_Debug_Libraries 1 -# PROP Output_Dir "dlltest_" -# PROP Intermediate_Dir "dlltest_" -# PROP Ignore_Export_Lib 0 -# PROP Target_Dir "" -# ADD BASE CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c -# ADD CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c -# ADD BASE RSC /l 0x411 /d "_DEBUG" -# ADD RSC /l 0x411 /d "_DEBUG" -BSC32=bscmake.exe -# ADD BASE BSC32 /nologo -# ADD BSC32 /nologo -LINK32=link.exe -# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept -# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /out:"minibz2.exe" /pdbtype:sept - -!ENDIF - -# Begin Target - -# Name "dlltest - Win32 Release" -# Name "dlltest - Win32 Debug" -# Begin Source File - -SOURCE=.\bzlib.h -# End Source File -# Begin Source File - -SOURCE=.\dlltest.c -# End Source File -# End Target -# End Project diff --git a/huffman.c b/huffman.c index 293095c..e45c5f6 100644 --- a/huffman.c +++ b/huffman.c @@ -4,6 +4,11 @@ /*--- huffman.c ---*/ /*-------------------------------------------------------------*/ +/*-- Modified for use under GNO by Stephen Heumann --*/ +#ifdef __ORCAC__ +segment "bzip2"; +#endif + /*-- This file is a part of bzip2 and/or libbzip2, a program and library for lossless, block-sorting data compression. @@ -112,9 +117,15 @@ void BZ2_hbMakeCodeLengths ( UChar *len, Int32 nNodes, nHeap, n1, n2, i, j, k; Bool tooLong; +#ifdef __ORCAC__ + static Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ]; + static Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ]; + static Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ]; +#else Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ]; Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ]; Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ]; +#endif for (i = 0; i < alphaSize; i++) weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8; diff --git a/libbz2.def b/libbz2.def deleted file mode 100644 index 2dc0dd8..0000000 --- a/libbz2.def +++ /dev/null @@ -1,27 +0,0 @@ -LIBRARY LIBBZ2 -DESCRIPTION "libbzip2: library for data compression" -EXPORTS - BZ2_bzCompressInit - BZ2_bzCompress - BZ2_bzCompressEnd - BZ2_bzDecompressInit - BZ2_bzDecompress - BZ2_bzDecompressEnd - BZ2_bzReadOpen - BZ2_bzReadClose - BZ2_bzReadGetUnused - BZ2_bzRead - BZ2_bzWriteOpen - BZ2_bzWrite - BZ2_bzWriteClose - BZ2_bzWriteClose64 - BZ2_bzBuffToBuffCompress - BZ2_bzBuffToBuffDecompress - BZ2_bzlibVersion - BZ2_bzopen - BZ2_bzdopen - BZ2_bzread - BZ2_bzwrite - BZ2_bzflush - BZ2_bzclose - BZ2_bzerror diff --git a/libbz2.dsp b/libbz2.dsp deleted file mode 100644 index a21a20f..0000000 --- a/libbz2.dsp +++ /dev/null @@ -1,130 +0,0 @@ -# Microsoft Developer Studio Project File - Name="libbz2" - Package Owner=<4> -# Microsoft Developer Studio Generated Build File, Format Version 5.00 -# ** •ÒW‚µ‚È‚¢‚Å‚­‚¾‚³‚¢ ** - -# TARGTYPE "Win32 (x86) Dynamic-Link Library" 0x0102 - -CFG=libbz2 - Win32 Debug -!MESSAGE ‚±‚ê‚Í—LŒø‚ÈÒ²¸Ì§²Ù‚Å‚Í‚ ‚è‚Ü‚¹‚ñB ‚±‚ÌÌßÛ¼Þª¸Ä‚ðËÞÙÄÞ‚·‚邽‚ß‚É‚Í NMAKE ‚ðŽg—p‚µ‚Ä‚­‚¾‚³‚¢B -!MESSAGE [Ò²¸Ì§²Ù‚Ì´¸½Îß°Ä] ºÏÝÄÞ‚ðŽg—p‚µ‚ÄŽÀs‚µ‚Ä‚­‚¾‚³‚¢ -!MESSAGE -!MESSAGE NMAKE /f "libbz2.mak". -!MESSAGE -!MESSAGE NMAKE ‚ÌŽÀsŽž‚É\¬‚ðŽw’è‚Å‚«‚Ü‚· -!MESSAGE ºÏÝÄÞ ×²Ýã‚ÅϸۂÌÝ’è‚ð’è‹`‚µ‚Ü‚·B—á: -!MESSAGE -!MESSAGE NMAKE /f "libbz2.mak" CFG="libbz2 - Win32 Debug" -!MESSAGE -!MESSAGE ‘I‘ð‰Â”\‚ÈËÞÙÄÞ Ó°ÄÞ: -!MESSAGE -!MESSAGE "libbz2 - Win32 Release" ("Win32 (x86) Dynamic-Link Library" —p) -!MESSAGE "libbz2 - Win32 Debug" ("Win32 (x86) Dynamic-Link Library" —p) -!MESSAGE - -# Begin Project -# PROP Scc_ProjName "" -# PROP Scc_LocalPath "" -CPP=cl.exe -MTL=midl.exe -RSC=rc.exe - -!IF "$(CFG)" == "libbz2 - Win32 Release" - -# PROP BASE Use_MFC 0 -# PROP BASE Use_Debug_Libraries 0 -# PROP BASE Output_Dir "Release" -# PROP BASE Intermediate_Dir "Release" -# PROP BASE Target_Dir "" -# PROP Use_MFC 0 -# PROP Use_Debug_Libraries 0 -# PROP Output_Dir "Release" -# PROP Intermediate_Dir "Release" -# PROP Ignore_Export_Lib 0 -# PROP Target_Dir "" -# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /YX /FD /c -# ADD CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /YX /FD /c -# ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /o NUL /win32 -# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /o NUL /win32 -# ADD BASE RSC /l 0x411 /d "NDEBUG" -# ADD RSC /l 0x411 /d "NDEBUG" -BSC32=bscmake.exe -# ADD BASE BSC32 /nologo -# ADD BSC32 /nologo -LINK32=link.exe -# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /machine:I386 -# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /machine:I386 /out:"libbz2.dll" - -!ELSEIF "$(CFG)" == "libbz2 - Win32 Debug" - -# PROP BASE Use_MFC 0 -# PROP BASE Use_Debug_Libraries 1 -# PROP BASE Output_Dir "Debug" -# PROP BASE Intermediate_Dir "Debug" -# PROP BASE Target_Dir "" -# PROP Use_MFC 0 -# PROP Use_Debug_Libraries 1 -# PROP Output_Dir "Debug" -# PROP Intermediate_Dir "Debug" -# PROP Ignore_Export_Lib 0 -# PROP Target_Dir "" -# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /YX /FD /c -# ADD CPP /nologo /MTd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /YX /FD /c -# ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /o NUL /win32 -# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /o NUL /win32 -# ADD BASE RSC /l 0x411 /d "_DEBUG" -# ADD RSC /l 0x411 /d "_DEBUG" -BSC32=bscmake.exe -# ADD BASE BSC32 /nologo -# ADD BSC32 /nologo -LINK32=link.exe -# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /debug /machine:I386 /pdbtype:sept -# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /debug /machine:I386 /out:"libbz2.dll" /pdbtype:sept - -!ENDIF - -# Begin Target - -# Name "libbz2 - Win32 Release" -# Name "libbz2 - Win32 Debug" -# Begin Source File - -SOURCE=.\blocksort.c -# End Source File -# Begin Source File - -SOURCE=.\bzlib.c -# End Source File -# Begin Source File - -SOURCE=.\bzlib.h -# End Source File -# Begin Source File - -SOURCE=.\bzlib_private.h -# End Source File -# Begin Source File - -SOURCE=.\compress.c -# End Source File -# Begin Source File - -SOURCE=.\crctable.c -# End Source File -# Begin Source File - -SOURCE=.\decompress.c -# End Source File -# Begin Source File - -SOURCE=.\huffman.c -# End Source File -# Begin Source File - -SOURCE=.\libbz2.def -# End Source File -# Begin Source File - -SOURCE=.\randtable.c -# End Source File -# End Target -# End Project diff --git a/makefile.msc b/makefile.msc deleted file mode 100644 index 799a18a..0000000 --- a/makefile.msc +++ /dev/null @@ -1,63 +0,0 @@ -# Makefile for Microsoft Visual C++ 6.0 -# usage: nmake -f makefile.msc -# K.M. Syring (syring@gsf.de) -# Fixed up by JRS for bzip2-0.9.5d release. - -CC=cl -CFLAGS= -DWIN32 -MD -Ox -D_FILE_OFFSET_BITS=64 -nologo - -OBJS= blocksort.obj \ - huffman.obj \ - crctable.obj \ - randtable.obj \ - compress.obj \ - decompress.obj \ - bzlib.obj - -all: lib bzip2 test - -bzip2: lib - $(CC) $(CFLAGS) -o bzip2 bzip2.c libbz2.lib setargv.obj - $(CC) $(CFLAGS) -o bzip2recover bzip2recover.c - -lib: $(OBJS) - lib /out:libbz2.lib $(OBJS) - -test: bzip2 - type words1 - .\\bzip2 -1 < sample1.ref > sample1.rb2 - .\\bzip2 -2 < sample2.ref > sample2.rb2 - .\\bzip2 -3 < sample3.ref > sample3.rb2 - .\\bzip2 -d < sample1.bz2 > sample1.tst - .\\bzip2 -d < sample2.bz2 > sample2.tst - .\\bzip2 -ds < sample3.bz2 > sample3.tst - @echo All six of the fc's should find no differences. - @echo If fc finds an error on sample3.bz2, this could be - @echo because WinZip's 'TAR file smart CR/LF conversion' - @echo is too clever for its own good. Disable this option. - @echo The correct size for sample3.ref is 120,244. If it - @echo is 150,251, WinZip has messed it up. - fc sample1.bz2 sample1.rb2 - fc sample2.bz2 sample2.rb2 - fc sample3.bz2 sample3.rb2 - fc sample1.tst sample1.ref - fc sample2.tst sample2.ref - fc sample3.tst sample3.ref - - - -clean: - del *.obj - del libbz2.lib - del bzip2.exe - del bzip2recover.exe - del sample1.rb2 - del sample2.rb2 - del sample3.rb2 - del sample1.tst - del sample2.tst - del sample3.tst - -.c.obj: - $(CC) $(CFLAGS) -c $*.c -o $*.obj - diff --git a/manual.html b/manual.html deleted file mode 100644 index 3218979..0000000 --- a/manual.html +++ /dev/null @@ -1,117 +0,0 @@ - - - - - -Untitled Document: Untitled Document - - - - - - - - - - - - - - - - - -
[Top][Contents][Index][ ? ]
-

Untitled Document

- -The following text is the License for this software. You should -find it identical to that contained in the file LICENSE in the -source distribution. -

- -@bf{------------------ START OF THE LICENSE ------------------} -

- -This program, bzip2, -and associated library libbzip2, are -Copyright (C) 1996-2002 Julian R Seward. All rights reserved. -

- -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -

    -
  • - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. -
  • - The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment in the product - documentation would be appreciated but is not required. -
  • - Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. -
  • - The name of the author may not be used to endorse or promote - products derived from this software without specific prior written - permission. -
-THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS -OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY -DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE -GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -

- -Julian Seward, Cambridge, UK. -

- -jseward@acm.org -

- -bzip2/libbzip2 version 1.0.2 of 30 December 2001. -

- -@bf{------------------ END OF THE LICENSE ------------------} -

- -Web sites: -

- -http://sources.redhat.com/bzip2 -

- -http://www.cacheprof.org -

- -PATENTS: To the best of my knowledge, bzip2 does not use any patented -algorithms. However, I do not have the resources available to carry out -a full patent search. Therefore I cannot give any guarantee of the -above statement. -

- -


-
- -This document was generated -by Julian Seward on January, 5 2002 -using texi2html - - - diff --git a/manual.pdf b/manual.pdf deleted file mode 100644 index 9be5d6d..0000000 Binary files a/manual.pdf and /dev/null differ diff --git a/manual.ps b/manual.ps deleted file mode 100644 index 41d1a7e..0000000 --- a/manual.ps +++ /dev/null @@ -1,3991 +0,0 @@ -%!PS-Adobe-2.0 -%%Creator: dvips(k) 5.86 Copyright 1999 Radical Eye Software -%%Title: manual.dvi -%%Pages: 40 -%%PageOrder: Ascend -%%BoundingBox: 0 0 596 842 -%%EndComments -%DVIPSWebPage: (www.radicaleye.com) -%DVIPSCommandLine: dvips -o manual.ps manual.dvi -%DVIPSParameters: dpi=600, compressed -%DVIPSSource: TeX output 2002.01.05:0052 -%%BeginProcSet: texc.pro -%! -/TeXDict 300 dict def TeXDict begin/N{def}def/B{bind def}N/S{exch}N/X{S -N}B/A{dup}B/TR{translate}N/isls false N/vsize 11 72 mul N/hsize 8.5 72 -mul N/landplus90{false}def/@rigin{isls{[0 landplus90{1 -1}{-1 1}ifelse 0 -0 0]concat}if 72 Resolution div 72 VResolution div neg scale isls{ -landplus90{VResolution 72 div vsize mul 0 exch}{Resolution -72 div hsize -mul 0}ifelse TR}if Resolution VResolution vsize -72 div 1 add mul TR[ -matrix currentmatrix{A A round sub abs 0.00001 lt{round}if}forall round -exch round exch]setmatrix}N/@landscape{/isls true N}B/@manualfeed{ -statusdict/manualfeed true put}B/@copies{/#copies X}B/FMat[1 0 0 -1 0 0] -N/FBB[0 0 0 0]N/nn 0 N/IEn 0 N/ctr 0 N/df-tail{/nn 8 dict N nn begin -/FontType 3 N/FontMatrix fntrx N/FontBBox FBB N string/base X array -/BitMaps X/BuildChar{CharBuilder}N/Encoding IEn N end A{/foo setfont}2 -array copy cvx N load 0 nn put/ctr 0 N[}B/sf 0 N/df{/sf 1 N/fntrx FMat N -df-tail}B/dfs{div/sf X/fntrx[sf 0 0 sf neg 0 0]N df-tail}B/E{pop nn A -definefont setfont}B/Cw{Cd A length 5 sub get}B/Ch{Cd A length 4 sub get -}B/Cx{128 Cd A length 3 sub get sub}B/Cy{Cd A length 2 sub get 127 sub} -B/Cdx{Cd A length 1 sub get}B/Ci{Cd A type/stringtype ne{ctr get/ctr ctr -1 add N}if}B/id 0 N/rw 0 N/rc 0 N/gp 0 N/cp 0 N/G 0 N/CharBuilder{save 3 -1 roll S A/base get 2 index get S/BitMaps get S get/Cd X pop/ctr 0 N Cdx -0 Cx Cy Ch sub Cx Cw add Cy setcachedevice Cw Ch true[1 0 0 -1 -.1 Cx -sub Cy .1 sub]/id Ci N/rw Cw 7 add 8 idiv string N/rc 0 N/gp 0 N/cp 0 N{ -rc 0 ne{rc 1 sub/rc X rw}{G}ifelse}imagemask restore}B/G{{id gp get/gp -gp 1 add N A 18 mod S 18 idiv pl S get exec}loop}B/adv{cp add/cp X}B -/chg{rw cp id gp 4 index getinterval putinterval A gp add/gp X adv}B/nd{ -/cp 0 N rw exit}B/lsh{rw cp 2 copy get A 0 eq{pop 1}{A 255 eq{pop 254}{ -A A add 255 and S 1 and or}ifelse}ifelse put 1 adv}B/rsh{rw cp 2 copy -get A 0 eq{pop 128}{A 255 eq{pop 127}{A 2 idiv S 128 and or}ifelse} -ifelse put 1 adv}B/clr{rw cp 2 index string putinterval adv}B/set{rw cp -fillstr 0 4 index getinterval putinterval adv}B/fillstr 18 string 0 1 17 -{2 copy 255 put pop}for N/pl[{adv 1 chg}{adv 1 chg nd}{1 add chg}{1 add -chg nd}{adv lsh}{adv lsh nd}{adv rsh}{adv rsh nd}{1 add adv}{/rc X nd}{ -1 add set}{1 add clr}{adv 2 chg}{adv 2 chg nd}{pop nd}]A{bind pop} -forall N/D{/cc X A type/stringtype ne{]}if nn/base get cc ctr put nn -/BitMaps get S ctr S sf 1 ne{A A length 1 sub A 2 index S get sf div put -}if put/ctr ctr 1 add N}B/I{cc 1 add D}B/bop{userdict/bop-hook known{ -bop-hook}if/SI save N @rigin 0 0 moveto/V matrix currentmatrix A 1 get A -mul exch 0 get A mul add .99 lt{/QV}{/RV}ifelse load def pop pop}N/eop{ -SI restore userdict/eop-hook known{eop-hook}if showpage}N/@start{ -userdict/start-hook known{start-hook}if pop/VResolution X/Resolution X -1000 div/DVImag X/IEn 256 array N 2 string 0 1 255{IEn S A 360 add 36 4 -index cvrs cvn put}for pop 65781.76 div/vsize X 65781.76 div/hsize X}N -/p{show}N/RMat[1 0 0 -1 0 0]N/BDot 260 string N/Rx 0 N/Ry 0 N/V{}B/RV/v{ -/Ry X/Rx X V}B statusdict begin/product where{pop false[(Display)(NeXT) -(LaserWriter 16/600)]{A length product length le{A length product exch 0 -exch getinterval eq{pop true exit}if}{pop}ifelse}forall}{false}ifelse -end{{gsave TR -.1 .1 TR 1 1 scale Rx Ry false RMat{BDot}imagemask -grestore}}{{gsave TR -.1 .1 TR Rx Ry scale 1 1 false RMat{BDot} -imagemask grestore}}ifelse B/QV{gsave newpath transform round exch round -exch itransform moveto Rx 0 rlineto 0 Ry neg rlineto Rx neg 0 rlineto -fill grestore}B/a{moveto}B/delta 0 N/tail{A/delta X 0 rmoveto}B/M{S p -delta add tail}B/b{S p tail}B/c{-4 M}B/d{-3 M}B/e{-2 M}B/f{-1 M}B/g{0 M} -B/h{1 M}B/i{2 M}B/j{3 M}B/k{4 M}B/w{0 rmoveto}B/l{p -4 w}B/m{p -3 w}B/n{ -p -2 w}B/o{p -1 w}B/q{p 1 w}B/r{p 2 w}B/s{p 3 w}B/t{p 4 w}B/x{0 S -rmoveto}B/y{3 2 roll p a}B/bos{/SS save N}B/eos{SS restore}B end - -%%EndProcSet -TeXDict begin 39158280 55380996 1000 600 600 (manual.dvi) -@start -%DVIPSBitmapFont: Fa cmbxti10 14.4 1 -/Fa 1 47 df<13FCEA03FF000F13804813C05AA25AA2B5FCA31480A214006C5A6C5A6C5A -EA0FE0121271912B>46 D E -%EndDVIPSBitmapFont -%DVIPSBitmapFont: Fb cmti10 10.95 26 -/Fb 26 122 df37 D44 D<120FEA3FC0127FA212FFA31380EA7F00123C0A0A77891C>46 -D<15FE913807FF8091381F07C091387C01F0ECF000494813F8494813780107147C495A49 -C7FC167E133E137EA25BA2485AA2000315FEA25B000715FCA2491301120FA34848EB03F8 -A44848EB07F0A448C7EA0FE0A316C0007E141F12FE1680153FA2481500A2157EA25DA25D -4813015D6C495A127C4A5A4A5A6C49C7FC143E6C5B380FC1F03803FFC0C648C8FC273F76 -BC2E>48 D<15031507150F151F151E153E157EEC01FEEC03FC1407141FEB01FF90380FFB -F8EB1FC3EB0E07130015F0A2140FA215E0A2141FA215C0A2143FA21580A2147FA21500A2 -5CA25CA21301A25CA21303A25CA21307A25CA2130FA25CA2131FA25CEB7FE0B612F0A215 -E0203D77BC2E>I59 D<147E49B47E903907C1C38090391F80EF -C090383F00FF017E137F4914804848133F485AA248481400120F5B001F5C157E485AA215 -FE007F5C90C7FCA21401485C5AA21403EDF0385AA21407EDE078020F1370127C021F13F0 -007E013F13E0003E137FECF3E1261F01E313C03A0F8781E3803A03FF00FF00D800FC133E -252977A72E>97 DII -II<167C4BB4FC923807C78092380F83C0ED1F87161FED3F3FA2157E -A21780EE0E004BC7FCA414015DA414035DA30103B512F8A390260007E0C7FCA3140F5DA5 -141F5DA4143F92C8FCA45C147EA414FE5CA413015CA4495AA4495AA4495A121E127F5C12 -FF49C9FCA2EAFE1EEAF83C1270EA7878EA3FE0EA0F802A5383BF1C>I -104 D<1478EB01FCA21303A314F8EB00E01400AD137C48B4FC38038F80EA0707000E13C0 -121E121CEA3C0F1238A2EA781F00701380A2EAF03F140012005B137E13FE5BA212015BA2 -12035B1438120713E0000F1378EBC070A214F0EB80E0A2EB81C01383148038078700EA03 -FEEA00F8163E79BC1C>I108 DIII<903903E001F890390FF807FE90 -3A1E7C1E0F80903A1C3E3C07C0013C137801389038E003E0EB783F017001C013F0ED8001 -9038F07F0001E015F8147E1603000113FEA2C75AA20101140717F05CA20103140F17E05C -A20107EC1FC0A24A1480163F010F15005E167E5E131F4B5A6E485A4B5A90393FB80F80DA -9C1FC7FCEC0FFCEC03E049C9FCA2137EA213FEA25BA21201A25BA21203A2387FFFE0B5FC -A22D3A80A72E>I -114 DII<137C48B4141C26 -038F80137EEA0707000E7F001E15FE121CD83C0F5C12381501EA781F007001805BA2D8F0 -3F1303140000005D5B017E1307A201FE5C5B150F1201495CA2151F0003EDC1C0491481A2 -153F1683EE0380A2ED7F07000102FF13005C01F8EBDF0F00009038079F0E90397C0F0F1C -90391FFC07F8903907F001F02A2979A731>I<017CEB01C048B4EB07F038038F80EA0707 -000E01C013F8121E001C1403EA3C0F0038EC01F0A2D8781F130000705BA2EAF03F91C712 -E012005B017E130116C013FE5B1503000115805BA2ED07001203495B150EA25DA25D1578 -000114706D5B0000495A6D485AD97E0FC7FCEB1FFEEB03F0252979A72A>I<017C167048 -B491387001FC3A038F8001F8EA0707000E01C015FE001E1403001CEDF000EA3C0F003817 -7C1507D8781F4A133C00701380A2D8F03F130F020049133812005B017E011F14784C1370 -13FE5B033F14F0000192C712E05BA2170100034A14C049137E17031880A2EF070015FE17 -0E00010101141E01F86D131C0000D9039F5BD9FC076D5A903A3E0F07C1E0903A1FFC03FF -C0902703F0007FC7FC372979A73C>I<137C48B4143826038F8013FCEA0707000E7F001E -1401001C15F8EA3C0F12381503D8781F14F000701380A2D8F03F1307020013E012005B01 -7E130F16C013FE5B151F1201491480A2153F000315005BA25D157EA315FE5D00011301EB -F8030000130790387C1FF8EB3FF9EB07E1EB00035DA21407000E5CEA3F80007F495AA24A -5AD8FF0090C7FC143E007C137E00705B387801F0383803E0381E0FC06CB4C8FCEA03F826 -3B79A72C>121 D E -%EndDVIPSBitmapFont -%DVIPSBitmapFont: Fc cmtt12 14.4 10 -/Fc 10 123 df50 D<383FFF805AB57EA37E7EEA003FAEED07FC92383FFF -8092B512E002C314F802CF8002DF8091B7FCDBF80F1380DBC00113C092C713E04A143F4A -EC1FF04A15F84A140F4AEC07FCA217034A15FE1701A318FF83A95F18FEA280170318FC6E -140718F86E140FEF1FF06E143F6EEC7FE06EECFFC0DBC0031380EDF01F92B6120002DF14 -FC02CF5C02C35CD91F8114C090260F807F90C7FC90C7EA0FF8384A7FC83E>98 -D<923803FFF85D4B7FA38181ED0003AEEC1FF0ECFFFE0103EBFF83010F14E34914F3017F -14FB90B7FC48EBF80F48EBC00191C7FC4848143F4848141F5B4848140F491407123F4914 -03127F5BA312FF90C8FCA97F127FA216077F123F6D140FA26C6C141F6D143F000F157F6C -6C14FF01FF5B6C6D5A6CD9F01FEBFFFC6C90B500FB13FE6D02F313FF6D14E3010F14C36D -020113FE010101FC14FC9026003FE0C8FC384A7CC83E>100 D<143E147F4A7E497FA56D -5B6EC8FC143E91C9FCAC003FB57E5A81A47EC7123FB3B3007FB71280B812C0A56C16802A -4A76C93E>105 D<007FB512C0B6FC81A47EC7121FB3B3B3A5007FB712F8B812FCA56C16 -F82E4978C83E>108 D111 -DI<903901FFF00F011F9038 -FE1F8090B612BF000315FF5A5A5A393FFE003F01F01307D87FC0130190C8FC5A48157FA4 -7EEE3F00D87FC091C7FC13F0EA3FFE381FFFF06CEBFFC06C14FE6C6E7EC615E0013F14F8 -010780D9003F7F02007F03071380030013C0003EED3FE0007F151F48150F17F06D1407A3 -7FA26D140F6D15E0161F01FCEC3FC06D14FF9026FFC00F138091B612005E485D013F5C6D -14E0D8FC0714802778007FF8C7FC2C3677B43E>115 D<147C14FC497EAD003FB712FC5A -B87EA36C5EA2260001FEC9FCB3A6173FA2EF7F80A76E14FF6D16006F5A9238C007FE9138 -7FF01F92B55A6E5C6E5C6E5C6E1480020149C7FC9138003FF031437DC13E>I<000FB812 -804817C04817E0A418C001C0C712014C13804C1300EE1FFE4C5AEE7FF06C484A5A4B5BC8 -485B4B90C7FC4B5A4B5A4B5A4B5A4B5A4A5B4A5B4A90C8FC4A5A4A5A4A5A4A5A4A5A495B -495B4990C9FC495A495A4948EC0FC0495A4948EC1FE0485B00075B4890C8FCEA1FFC485A -485A90B8FCB9FCA46C17C07E33337CB23E>122 D E -%EndDVIPSBitmapFont -%DVIPSBitmapFont: Fd cmtt12 13.14 31 -/Fd 31 123 df<903801FFC0010F13F8013F13FF4914C048B67E48814815FC4801007FD8 -1FF8130F01E0EB03FF48480100138049EC7FC0007F153F90C8121F4816E0160F17F07F16 -07A36CC8FC123EC9FCA2160F17E0A2161F17C0163FA2EE7F80EEFF00A24B5A4B5A15074B -5A4B5A4B5A4B5A4B5A020390C7FC4A5A4A5A4A5A4A5AEC7FC049485A4990C8FC495A495A -495AEB3FE0EBFFC04849EB03E04890C7EA07F0485A485AEA1FF048B7FC5AB8FCA37E6C16 -E02C447AC338>50 D<003FB6FC4815E0B712F882826C816C16802701FC000113C0923800 -7FE0161FEE0FF0A2160717F81603A6160717F0A2160FEE1FE0163FEE7FC0923801FF8003 -0F130090B65A5E16F08216FEEEFF8017C001FCC7EA7FE0EE1FF0EE07F8160317FCEE01FE -1600A217FF177FA717FF17FE16011603EE07FC160FEE3FF8EEFFF0003FB7FC4816E0B812 -C01780EEFE006C15F86C15C030437DC238>66 DI<007FB512F8B7FC16C08216F8826C813A03F8001FFF15070301 -13806F13C0167FEE3FE0161FEE0FF0A2EE07F8A2EE03FCA21601A217FE1600A4177FAD17 -FF17FEA4160117FCA2160317F81607A2EE0FF0161FEE3FE0167FEEFFC04B13805D031F13 -00007FB65AB75A5E5E16C093C7FC6C14F830437DC238>I<007FB712FCB87EA57ED801FC -C8FCA9177C94C7FCA6ED07C04B7EA590B6FCA79038FC000FA56F5A92C9FCA7EF0F80EF1F -C0AA007FB8FCB9FCA56C178032437DC238>I<91391FF003C091397FFC07E049B5FC0107 -14CF4914EF4914FF5B90387FF81F9038FFE00748EB800191C7FC4848147F485A49143F48 -5A161F485AA249140F123F5BA2127F90C8EA07C093C7FCA35A5AAA923807FFFC4B13FE4B -13FF7E7E6F13FE6F13FC9238000FE07F003F151FA27F121F7F163F6C7EA26C6C147F7F6C -6C14FF6C6C5B6E5A6C6D5A90387FF81F6DB6FC6D14EF6D14CF6D148F0101140F903A007F -FC07C0DA1FF0C7FC30457CC338>71 D<007FB612F0B712F8A56C15F0260001FCC7FCB3B3 -B1007FB612F0B712F8A56C15F0254377C238>73 D<90380FFFFE90B612E0000315F84881 -48814881A2273FFE000F138001F01301497F49147F4848EC3FC0A290C8121FA44816E0A2 -48150FB3AC6C151FA36C16C0A36D143FA36C6CEC7F806D14FF6D5B01FE130F6CB71200A2 -6C5D6C5D6C5DC615E0010F49C7FC2B457AC338>79 D<003FB512F04814FEB77E16E0826C -816C813A01FC003FFEED07FF03017F81707E163F161F83160FA7161F5F163F167F4C5A5D -030790C7FCED3FFE90B65A5E5E5EA282829038FC001FED07FC6F7E150115008282AA18E0 -EF01F0EF03F8A31783EE3F87263FFFE0ECC7F0486D14FFB56C7F18E07013C06C496D1380 -6C496D1300CA12FC35447EC238>82 D<003FB8FC481780B9FCA53BFE0007F0003FA9007C -EE1F00C792C7FCB3B3A70107B512F04980A56D5C31437DC238>84 -D<267FFFF090387FFFF0B56C90B512F8A56C496D13F0D801FCC73801FC00B3B3A66D1403 -00005EA36D14076D5D6E130F6D6C495A6E133F6D6C495A6D6C495AECFF076D90B5C7FC6D -5C6D5C6D5C023F13E0020F1380DA03FEC8FC35447FC238>I87 -D<001FB712F04816F85AA417F090C8121F17E0EE3FC0167F1780EEFF00A24B5A4B5A123E -C8485A4B5AA24B5A4B5AA24B5A4BC7FCA24A5A14035D4A5A140F5D4A5A143F5D4A5A14FF -92C8FC495A13035C495AA2495A495AA2495A495A17F849C7EA01FC485AA2485A485AA248 -5A121F5B485A127F90B7FCB8FCA56C16F82E437BC238>90 D<003FB712804816C0B812E0 -A46C16C06C16802B087A7D38>95 D97 DIIIII<14F0497E497E497EA4 -6D5A6D5A6D5A91C8FCAB383FFFFC487FB5FCA37E7EC7FCB3AF007FB612F0B712F816FCA3 -16F86C15F0264476C338>105 D<387FFFFEB6FCA57EC77EB3B3B1007FB7FCB81280A56C -1600294379C238>108 D<023FEB07E03B3FE0FFC01FF8D87FF39038E07FFCD8FFF76D48 -7E90B500F97F15FB6C91B612806C01C1EBF83F00030100EBE01F4902C013C0A24990387F -800FA2491400A349137EB3A73C3FFF03FFE07FFC4801879038F0FFFEB500C76D13FFA36C -01874913FE6C01039038E07FFC383080AF38>IIII114 D<903907FF80F0017FEBF1F848B5 -12FD000714FF5A5A5AEBFC00D87FE0131F0180130F48C71207481403A5007FEC01F001C0 -90C7FCEA3FF013FE381FFFF86CEBFFC0000314F8C614FF013F1480010714E0D9003F13F0 -020013F8ED0FFC1503003CEC01FE007E140000FE15FF167F7EA37F6D14FF16FE01F01303 -6DEB07FC01FF137F91B512F816F016E04815C0D8FC3F1400010F13FCD8780113E0283278 -B038>III<000FB712FC4816FE5AA417 -FC0180C7EA1FF8EE3FF0EE7FE0EEFFC04B13804B13006CC7485AC8485A4B5A4B5A4B5A4B -5A4A5B4A90C7FCEC07FC4A5A4A5A4A5A4A5A49485A4990C8FC495A495A495A495A494814 -7C494814FE485B4890C8FC485A485A485A485A48B7FCB8FCA56C16FC2F2F7DAE38>122 -D E -%EndDVIPSBitmapFont -%DVIPSBitmapFont: Fe cmbx12 13.14 53 -/Fe 53 122 df<923803FFE092B512FC020714FF021F81027F9038007FC0DAFFF0EB0FE0 -010301C0804990C7EA3FF84948147F4A81494814FF495AA2137F5CA2715A715A715AEF07 -8094C8FCA8EF07FCB9FCA526007FF0C7123F171FB3B3003FB5D8E00FB512F8A53D4D7ECC -44>12 D45 DI<177817F8EE01FCA21603A2EE07F8A217F0160FA217 -E0161FA2EE3FC0A21780167FA217005EA24B5AA25E1503A24B5AA25E150FA25E151FA24B -5AA25E157FA24BC7FCA25D1401A25D1403A24A5AA25D140FA24A5AA25D143FA25D147FA2 -4AC8FCA25C1301A25C1303A2495AA25C130FA2495AA25C133FA25C137FA249C9FCA25B12 -01A2485AA25B1207A25B120FA2485AA25B123FA25B127FA248CAFCA25AA2127CA22E6D79 -D13D>I<15F014011407141F147FEB03FF137FB6FCA313FC1380C7FCB3B3B2007FB712E0 -A52B4777C63D>49 DIIIII<121F7F7F13FE90B812E0A45A18C0188018005F5FA25F485E90C8EA07E0 -007E4B5A5F007C151F4CC7FC167E5E485D15014B5A4B5AC8485A4B5AA24BC8FC157EA25D -1401A24A5A1407A24A5AA2141FA24A5AA2147FA314FFA3495BA45BA55BAA6D5BA26D90C9 -FCEB007C334B79C93D>III65 -D<93261FFF80EB01C00307B500F81303033F02FE13074AB7EA800F0207EEE01F021F903A -FE007FF83F027F01E0903807FC7F91B5C73801FEFF010301FCEC007F4901F0814901C015 -0F4949814990C97E494882495A48498248197F5C48193F5C48191F5C48190FA2485BA21A -075AA391CDFCA2B5FCAD7EA280F207C0A27EA36C7F1A0F6C1A80806C191F6E18006C6180 -6C197E6C6D177C6D6C17FC6D6C4C5A6D6D4B5A6D6D4B5A6D01F0ED1FC06D01FC4B5A0100 -01FF03FFC7FC6E01E0EB07FE021F01FEEB3FFC020790B612F0020116C0DA003F92C8FC03 -0714F8DB001F13804A4D79CB59>67 DIII<93261FFF80EB01C00307B500F81303 -033F02FE13074AB7EA800F0207EEE01F021F903AFE007FF83F027F01E0903807FC7F91B5 -C73801FEFF010301FCEC007F4901F0814901C0150F4949814990C97E494882495A484982 -48197F5C48193F5C48191F5C48190FA2485BA21A075AA391CEFCA2B5FCAD7E050FB712C0 -80A37E94C7001FEBC000807EA27E807E807E806C7F7E6D7E6D7E6D7F6D01E05D6D6D5D6D -13FC010001FF4AB5FC6E01E0EB07F9021F01FFEB3FF0020791B5EAE07F0201EEC01FDA00 -3FED0007030702F81301DB001F018090C8FC524D79CB60>II< -B712FEA5D8000FEBE000B3B3B3ABB712FEA5274B7DCA2E>I76 DIIII82 DI<003FBB12C0A5DA80019038FC001FD9FC001601D87FF0943800 -7FE001C0183F49181F90C7170FA2007E1907A3007C1903A500FC1AF0481901A5C894C7FC -B3B3A749B812FCA54C4A7CC955>III89 -D97 DI<91380FFF8091B512F8010314FF010F158090263FFE0013C0 -D97FF8EB1FE0D9FFE0EB3FF04849EB7FF8484913FF4890C7FC5A5B121F5B003FED7FF0EE -3FE0007FED1FC093C7FC5BA212FFAC127F7FA2123FA26D153E121F6D157E6C167C6C6D14 -FC6C16F86C6D13036C01F0EB07F0D97FFCEB1FE06DB4EBFFC0010F90B5120001035C0100 -14F0020F13802F337CB137>IIIIII<13FCEA03FF487F487FA2487FA66C -5BA26C5B6C90C7FCEA00FC90C8FCACEB7FC0B5FCA512037EB3B3B61280A5194D7BCC22> -I108 D<90287FC001FFE0EC7F -F8B5010F01FC0103B5FC033F6D010F804B6D6C4814E0DBFE079026C03F817F9126C3F001 -9138FC007F0003D9C7E0DAE1F8806CDA8000D9F1E06D7E02CFC7EBF3C002DE91267FF780 -131F02FC03FFC77FA24A5DA24A5DA34A5DB3A9B6D8C03FB5D8F00FB512FCA55E317BB067 ->I<903A7FC001FFC0B5010F13F8033F13FE4B7F9126C1FE077F9126C3F0037F00039038 -C7C0016CD9CF8080150002DE7F02FC81A25CA25CA35CB3A9B6D8C07FEBFFE0A53B317BB0 -44>I<913807FF80027F13F80103B6FC010F15C090261FFE017F903A7FF0003FF8D9FFC0 -EB0FFC48496D7E4890C76C7E4817804980000F17C048486E13E0A2003F17F049157FA200 -7F17F8A400FF17FCAB007F17F8A36C6CEDFFF0A3001F17E06D5C000F17C06C6C4A13806C -17006C6D495A6C01E0EB1FFC6D6C495A903A3FFE01FFF0010FB612C0010392C7FCD9007F -13F80207138036337DB13D>I<90397FC00FFFB5017F13F002C1B512FC02C714FF9126CF -F80F7F9126FFC00313E0000391C77F6C01FC6E7E4A6E7E5C4A6E7E848319808319C0A471 -13E0AC4D13C0A319805FA219004D5A804D5A6E4A5A6E4A5A02FF495BDB80075B9126EFF0 -1F5B02E7B548C7FC02E314F802E014E0DB0FFEC8FC92CAFCAFB612C0A53B467CB044>I< -9039FF803FE0B5EBFFF8028113FE02837FDA87E11380EC8F830003D99F0713C06C139E14 -BCA214F8A24A6C13806F13004A6C5A93C7FCA45CB3A7B612E0A52A317CB032>114 -D<90390FFF8070017FEBF1F048B6FC1207380FFC01391FE0003F4848130F491307127F90 -C712035A1501A27FA213E06D90C7FC13FE387FFFF0ECFFC015F06C14FC6C14FF6C15806C -15C06C15E0C615F0013F14F8010714FCEB007F14019138003FFE150F0078140700F81403 -A26C1401A37E16FC6C14036D14F87F6DEB07F001F8EB1FE001FFEBFFC091B51280D8FC7F -1400D8F81F13FCD8E00313C027337CB130>I<14F8A61301A41303A21307A2130FA2131F -133F137F13FF1203000F90B512F0B7FCA426007FF8C7FCB3A6167CAA013F14F880A29039 -1FFE01F0010F1303903907FF87E06DEBFFC06D14806D6C1300EC0FFC26467EC430>I -III<007FB500C090387FFFE0A5C601F0C73803F8006E5D -017F5E6E1407013F5E6E140F011F5E6E141FA26D6D91C7FC5F6D153E6F137E6D157C6F13 -FC6D5DEDF0016D5DEDF803027F5C15FC1607DA3FFE5B160F021F5CEDFF1F6E91C8FC16BF -6E13BE16FE6E5BA36E5BA26E5BA26F5AA26F5AA26F5AA393C9FC5D153E157E157CD81F80 -13FC486C5B387FE001D8FFF05B14035D14074A5A49485A007F49CAFCEBC07E383F81FC6C -B45A6C5B6C13C0C648CBFC3B467EAF41>121 D E -%EndDVIPSBitmapFont -%DVIPSBitmapFont: Ff cmtt12 17.28 6 -/Ff 6 123 df<913803FFC0023F13FC49B67E010715F04981013F15FE498190B812C048 -8348D9FC0180489026E0001F7F480180130391C87F48486F7E49153F4848ED0FFF834848 -178083496F13C012FF8319E07FA2187FA36C5A6C5A6C5ACBFCA218FFA219C05FA219805F -A24D13005F604D5A173F4D5A4D5AA24C5B4C5B4C5B041F90C7FC4C5A4C5A4C5A4B5B4B5B -4B5B031F5B4B90C8FC4B5AEDFFF84A5B4A5B4A5B021F5B4A90C9FCEC7FFC4A5A495B495B -010F5B495B4948CAFC4948ED1F804948ED3FC04849ED7FE0485B000F5B4890C9FC4890B8 -FC5ABAFCA56C18C06C18803B5A79D94A>50 D<383FFFF0487F80B5FCA37EA27EEA000FB0 -EE0FFC93B57E030714E0031F14F84B14FE92B7FC02FD8291B87E85DCE01F7FEE000703FC -01017F4B6D7F03E0143F4B6E7E4B140F8592C87E4A6F1380A34A6F13C0A284A21AE0A219 -7FAA19FFA21AC0A26E5DA24E138080606F1600606F4A5A6F143F6F4A5A6F4A5A6F130303 -FF010F5BDCC03F5B93B65A6102FD93C7FC02FC5D6F5C031F14F0902607F80714C0902603 -F00191C8FC90C8EA3FF043597FD74A>98 D105 D<003FB512FE4880B77EA57E7EC71201B3B3B3 -B0003FB812FC4817FEBAFCA56C17FE6C17FC385877D74A>108 D -112 D<000FB912E04818F04818F8A619F001F0C8000313E04D13C04D13804D13004D5A4D -5A4D5A6C484A5B6C484A5BC9000F5B4C5B4C90C7FC4C5A4C5A4B5B4B5B4B5B4B5B4B5B4B -90C8FC4B5A4B5A4A5B4A5B4A5B4A5B4A5B4A90C9FC4A5A4A5A495B495B495B4949EC07E0 -4949EC0FF04948C8EA1FF8495A495A485B485B485B485B4890C9FC485A48B9FCBAFCA66C -18F06C18E03D3E7BBD4A>122 D E -%EndDVIPSBitmapFont -%DVIPSBitmapFont: Fg cmbx12 17.28 28 -/Fg 28 120 df<16F04B7E1507151F153FEC01FF1407147F010FB5FCB7FCA41487EBF007 -C7FCB3B3B3B3007FB91280A6395E74DD51>49 D<913801FFF8021FEBFFC091B612F80103 -15FF010F16C0013F8290267FFC0114F89027FFE0003F7F4890C7000F7F48486E7FD807F8 -6E148048486E14C048486E14E048486F13F001FC17F8486C816D17FC6E80B56C16FE8380 -A219FFA283A36C5BA26C5B6C90C8FCD807FC5DEA01F0CA14FEA34D13FCA219F85F19F04D -13E0A294B512C019804C14004C5B604C5B4C5B604C13804C90C7FC4C5A4C5A4B13F05F4B -13804B90C8FC4B5AED1FF84B5A4B5A4B48143F4A5B4A48C8FC4A5A4A48157E4A5A4A5AEC -7F8092C9FC02FE16FE495A495A4948ED01FCD90FC0150749B8FC5B5B90B9FC5A4818F85A -5A5A5A5ABAFCA219F0A4405E78DD51>I<92B5FC020F14F8023F14FF49B712C04916F001 -0FD9C01F13FC90271FFC00077FD93FE001017F49486D8049C86C7F484883486C6F7F14C0 -486D826E806E82487FA4805CA36C5E4A5E6C5B6C5B6C495E011FC85A90C95CA294B55A61 -4C91C7FC604C5B4C5B4C5B4C5B047F138092260FFFFEC8FC020FB512F817E094C9FC17F8 -17FF91C7003F13E0040713F8040113FE707F717F7113E085717FA2717F85A285831A80A3 -1AC0EA03FCEA0FFF487F487F487FA2B57EA31A80A34D14005C7E4A5E5F6C495E49C8485B -D81FF85F000F5ED807FE92B55A6C6C6C4914806C01F0010791C7FC6C9026FF803F5B6D90 -B65A011F16F0010716C001014BC8FCD9001F14F0020149C9FC426079DD51>II<4DB5ED03C0057F02F0 -14070407B600FE140F047FDBFFC0131F4BB800F0133F030F05FC137F033F9127F8007FFE -13FF92B6C73807FF814A02F0020113C3020702C09138007FE74A91C9001FB5FC023F01FC -16074A01F08291B54882490280824991CB7E49498449498449498449865D49498490B5FC -484A84A2484A84A24891CD127FA25A4A1A3F5AA348491A1FA44899C7FCA25CA3B5FCB07E -A380A27EA2F50FC0A26C7FA37E6E1A1F6C1D80A26C801D3F6C6E1A00A26C6E616D1BFE6D -7F6F4E5A7F6D6D4E5A6D6D4E5A6D6D4E5A6D6E171F6D02E04D5A6E6DEFFF806E01FC4C90 -C7FC020F01FFEE07FE6E02C0ED1FF8020102F8ED7FF06E02FF913803FFE0033F02F8013F -1380030F91B648C8FC030117F86F6C16E004071680DC007F02F8C9FC050191CAFC626677 -E375>67 D72 DI77 -D80 D<001FBEFCA64849C79126E0000F148002E0180091 -C8171F498601F81A0349864986A2491B7FA2491B3F007F1DC090C9181FA4007E1C0FA600 -FE1DE0481C07A5CA95C7FCB3B3B3A3021FBAFCA663617AE070>84 -D<913803FFFE027FEBFFF00103B612FE010F6F7E4916E090273FFE001F7FD97FE001077F -D9FFF801017F486D6D7F717E486D6E7F85717FA2717FA36C496E7FA26C5B6D5AEB1FC090 -C9FCA74BB6FC157F0207B7FC147F49B61207010F14C0013FEBFE004913F048B512C04891 -C7FC485B4813F85A5C485B5A5CA2B55AA45FA25F806C5E806C047D7F6EEB01F96C6DD903 -F1EBFF806C01FED90FE114FF6C9027FFC07FC01580000191B5487E6C6C4B7E011F02FC13 -0F010302F001011400D9001F90CBFC49437CC14E>97 D<903807FF80B6FCA6C6FC7F7FB3 -A8EFFFF8040FEBFF80047F14F00381B612FC038715FF038F010014C0DBBFF0011F7FDBFF -C001077F93C76C7F4B02007F03F8824B6F7E4B6F13804B17C0851BE0A27313F0A21BF8A3 -7313FCA41BFEAE1BFCA44F13F8A31BF0A24F13E0A24F13C06F17804F1300816F4B5A6F4A -5B4AB402075B4A6C6C495B9126F83FE0013F13C09127F00FFC03B55A4A6CB648C7FCDAC0 -0115F84A6C15E091C7001F91C8FC90C8000313E04F657BE35A>I<92380FFFF04AB67E02 -0F15F0023F15FC91B77E01039039FE001FFF4901F8010113804901E0010713C049018049 -13E0017F90C7FC49484A13F0A2485B485B5A5C5A7113E0485B7113C048701380943800FE -0095C7FC485BA4B5FCAE7EA280A27EA2806C18FCA26C6D150119F87E6C6D15036EED07F0 -6C18E06C6D150F6D6DEC1FC06D01E0EC7F806D6DECFF00010701FCEB03FE6D9039FFC03F -FC010091B512F0023F5D020F1580020102FCC7FCDA000F13C03E437BC148>II<92380FFFC0 -4AB512FC020FECFF80023F15E091B712F80103D9FE037F499039F0007FFF011F01C0011F -7F49496D7F4990C76C7F49486E7F48498048844A804884485B727E5A5C48717EA35A5C72 -1380A2B5FCA391B9FCA41A0002C0CBFCA67EA380A27EA27E6E160FF11F806C183F6C7FF1 -7F006C7F6C6D16FE6C17016D6C4B5A6D6D4A5A6D01E04A5A6D6DEC3FE0010301FC49B45A -6D9026FFC01F90C7FC6D6C90B55A021F15F8020715E0020092C8FC030713F041437CC14A ->III<903807FF80B6FCA6C6FC7F7FB3A8EF1FFF94B512F0040714 -FC041F14FF4C8193267FE07F7F922781FE001F7FDB83F86D7FDB87F07FDB8FC0814C7F03 -9FC78015BE03BC8003FC825DA25DA25DA45DB3B2B7D8F007B71280A651647BE35A>II<903807FF80B6 -FCA6C6FC7F7FB3B3B3B3ADB712E0A623647BE32C>108 D<902607FF80D91FFFEEFFF8B6 -91B500F00207EBFF80040702FC023F14E0041F02FF91B612F84C6F488193267FE07F6D48 -01037F922781FE001F9027E00FF0007FC6DA83F86D9026F01FC06D7F6DD987F06D4A487F -6DD98FC0DBF87EC7804C6D027C80039FC76E488203BEEEFDF003BC6E4A8003FC04FF834B -5FA24B5FA24B94C8FCA44B5EB3B2B7D8F007B7D8803FB612FCA67E417BC087>I<902607 -FF80EB1FFFB691B512F0040714FC041F14FF4C8193267FE07F7F922781FE001F7FC6DA83 -F86D7F6DD987F07F6DD98FC0814C7F039FC78015BE03BC8003FC825DA25DA25DA45DB3B2 -B7D8F007B71280A651417BC05A>I<923807FFE092B6FC020715E0021F15F8027F15FE49 -4848C66C6C7E010701F0010F13E04901C001037F49496D7F4990C87F49486F7E49486F7E -48496F13804819C04A814819E048496F13F0A24819F8A348496F13FCA34819FEA4B518FF -AD6C19FEA46C6D4B13FCA36C19F8A26C6D4B13F0A26C19E06C6D4B13C0A26C6D4B13806C -6D4B13006D6C4B5A6D6D495B6D6D495B010701F0010F13E06D01FE017F5B010090B7C7FC -023F15FC020715E0020092C8FC030713E048437CC151>I114 D<913A3FFF8007800107B5EAF81F011FECFE7F017F91B5FC48B8FC48EBE0 -014890C7121FD80FFC1407D81FF0801600485A007F167F49153FA212FF171FA27F7F7F6D -92C7FC13FF14E014FF6C14F8EDFFC06C15FC16FF6C16C06C16F06C826C826C826C82013F -1680010F16C01303D9007F15E0020315F0EC001F1500041F13F81607007C150100FC8117 -7F6C163FA2171F7EA26D16F0A27F173F6D16E06D157F6D16C001FEEDFF806D0203130002 -C0EB0FFE02FCEB7FFC01DFB65A010F5DD8FE0315C026F8007F49C7FC48010F13E035437B -C140>II<90 -2607FFC0ED3FFEB60207B5FCA6C6EE00076D826D82B3B3A260A360A2607F60183E6D6D14 -7E4E7F6D6D4948806D6DD907F0ECFF806D01FFEB3FE06D91B55A6E1500021F5C020314F8 -DA003F018002F0C7FC51427BC05A>I119 D -E -%EndDVIPSBitmapFont -%DVIPSBitmapFont: Fh cmsy10 10.95 1 -/Fh 1 16 df15 -D E -%EndDVIPSBitmapFont -%DVIPSBitmapFont: Fi cmtt10 10.95 89 -/Fi 89 127 df<121C127FEAFF80B3EA7F00B2123EC7FCA8121C127FA2EAFF80A3EA7F00 -A2121C09396DB830>33 D<00101304007C131F00FEEB3F80A26C137FA248133FB2007E14 -00007C7F003C131E00101304191C75B830>I<903907C007C0A2496C487EA8011F131FA2 -02C05BA3007FB7FCA2B81280A36C16006C5D3A007F807F80A2020090C7FCA9495BA2003F -90B512FE4881B81280A36C1600A22701FC01FCC7FCA300031303A201F85BA76C486C5AA2 -29387DB730>I38 DI<141E147F14FF5BEB03FEEB07FCEB0FF0EB1FE0EB3FC0EB7F80EBFF00 -485A5B12035B485A120F5BA2485AA2123F5BA2127F90C7FCA412FEAD127FA47F123FA27F -121FA26C7EA27F12076C7E7F12017F6C7EEB7F80EB3FC0EB1FE0EB0FF0EB07FCEB03FEEB -01FF7F147F141E184771BE30>I<127812FE7E7F6C7E6C7EEA0FF06C7E6C7E6C7E6C7EEB -7F80133F14C0131FEB0FE014F01307A2EB03F8A214FC1301A214FE1300A4147FAD14FEA4 -130114FCA2130314F8A2EB07F0A2130F14E0EB1FC0133F1480137FEBFF00485A485A485A -485AEA3FE0485A485A90C7FC5A1278184778BE30>I<14E0497E497EA60038EC0380007E -EC0FC0D8FF83EB3FE001C3137F9038F3F9FF267FFBFB13C06CB61280000FECFE00000314 -F86C5C6C6C13C0011F90C7FC017F13C048B512F04880000F14FE003FECFF80267FFBFB13 -C026FFF3F913E09038C3F87F0183133FD87E03EB0FC00038EC0380000091C7FCA66D5A6D -5A23277AAE30>I<143EA2147FAF007FB7FCA2B81280A36C1600A2C76CC8FCAF143EA229 -297DAF30>II<007FB612F0 -A2B712F8A36C15F0A225077B9E30>I<120FEA3FC0EA7FE0A2EAFFF0A4EA7FE0A2EA3FC0 -EA0F000C0C6E8B30>I<16F01501ED03F8A21507A2ED0FF0A2ED1FE0A2ED3FC0A2ED7F80 -A2EDFF00A24A5AA25D1403A24A5AA24A5AA24A5AA24A5AA24A5AA24AC7FCA2495AA25C13 -03A2495AA2495AA2495AA2495AA2495AA249C8FCA2485AA25B1203A2485AA2485AA2485A -A2485AA2485AA248C9FCA25AA2127CA225477BBE30>I<14FE903807FFC0497F013F13F8 -497F90B57E48EB83FF4848C6138049137F4848EB3FC04848EB1FE049130F001F15F04913 -07A24848EB03F8A290C712014815FCA400FEEC00FEAD6C14016C15FCA36D1303003F15F8 -A26D1307001F15F0A26D130F6C6CEB1FE0A26C6CEB3FC06C6CEB7F806D13FF2601FF8313 -006CEBFFFE6D5B6D5B010F13E06D5BD900FEC7FC273A7CB830>IIIII<000FB612804815C05AA316800180C8FCAEEB83FF019F13C0 -90B512F015FC8181D9FE0313809039F0007FC049133F0180EB1FE06CC7120F000E15F0C8 -1207A216F81503A31218127EA2B4FC150716F048140F6C15E06C141F6DEB3FC06D137F3A -3FE001FF80261FFC0F13006CB55A6C5C6C5C6C14E06C6C1380D90FFCC7FC25397BB730> -II<127CB712FC16FEA416FC48C7EA0FF816F0ED1FE0007CEC3FC0C8EA7F80EDFF -00A24A5A4A5A5D14075D140F5D4A5AA24A5AA24AC7FCA25C5C13015CA213035CA213075C -A4495AA6131F5CA96D5A6DC8FC273A7CB830>I<49B4FC011F13F0017F13FC90B57E0003 -ECFF804815C048010113E03A1FF8003FF049131FD83FC0EB07F8A24848EB03FC90C71201 -A56D1303003F15F86D13076C6CEB0FF06C6CEB1FE0D807FCEB7FC03A03FF83FF806C90B5 -12006C6C13FC011F13F0497F90B512FE48802607FE0013C0D80FF8EB3FE0D81FE0EB0FF0 -4848EB07F8491303007F15FC90C712014815FE481400A66C14016C15FC6D1303003F15F8 -6D1307D81FF0EB1FF06D133F3A0FFF01FFE06C90B512C06C1580C6ECFE006D5B011F13F0 -010190C7FC273A7CB830>I<49B4FC010F13E0013F13F890B57E4880488048010113803A -0FFC007FC0D81FF0EB3FE04848131F49EB0FF048481307A290C7EA03F85A4815FC1501A4 -16FEA37E7E6D1303A26C6C13076C6C130F6D133FD80FFC13FF6CB6FC7E6C14FE6C14F901 -3FEBE1FC010F138190380060011400ED03F8A2150716F0150F000F15E0486C131F486CEB -3FC0157FEDFF804A1300EC07FE391FF01FFC90B55A6C5C6C5C6C1480C649C7FCEB3FF027 -3A7CB830>I<120FEA3FC0EA7FE0A2EAFFF0A4EA7FE0A2EA3FC0EA0F00C7FCAF120FEA3F -C0EA7FE0A2EAFFF0A4EA7FE0A2EA3FC0EA0F000C276EA630>II<16F01503ED07F8151F157FEDFFF014034A13C0021F138091383FFE00ECFFF849 -5B010713C0495BD93FFEC7FC495A3801FFF0485B000F13804890C8FCEA7FFC5BEAFFE05B -7FEA7FF87FEA1FFF6C7F000313E06C7F38007FFC6D7E90380FFF806D7F010113F06D7FEC -3FFE91381FFF80020713C06E13F01400ED7FF8151F1507ED03F01500252F7BB230>I<00 -7FB7FCA2B81280A36C16006C5DCBFCA7003FB612FE4881B81280A36C1600A229157DA530 ->I<1278127EB4FC13C07FEA7FF813FEEA1FFF6C13C000037F6C13F86C6C7EEB1FFF6D7F -010313E06D7F9038007FFC6E7E91380FFF806E13C0020113F080ED3FF8151F153FEDFFF0 -5C020713C04A138091383FFE004A5A903801FFF0495B010F13804990C7FCEB7FFC48485A -4813E0000F5B4890C8FCEA7FFE13F8EAFFE05B90C9FC127E1278252F7BB230>I64 -D<147F4A7EA2497FA4497F14F7A401077F14E3A3010F7FA314C1A2011F7FA490383F80FE -A590387F007FA4498049133F90B6FCA34881A39038FC001F00038149130FA40007814913 -07A2D87FFFEB7FFFB56CB51280A46C496C130029397DB830>I<007FB512F0B612FE6F7E -82826C813A03F8001FF815076F7E1501A26F7EA615015EA24B5A1507ED1FF0ED7FE090B6 -5A5E4BC7FC6F7E16E0829039F8000FF8ED03FC6F7E1500167FA3EE3F80A6167F1700A25E -4B5A1503ED1FFC007FB6FCB75A5E16C05E6C02FCC7FC29387EB730>I<91387F803C9039 -03FFF03E49EBFC7E011F13FE49EBFFFE5B9038FFE07F48EB801F3903FE000F484813075B -48481303A2484813015B123F491300A2127F90C8FC167C16005A5AAC7E7EA2167C6D14FE -123FA27F121F6D13016C6C14FCA26C6CEB03F86D13076C6CEB0FF03901FF801F6C9038E0 -7FE06DB512C06D14806D1400010713FC6D13F09038007FC0273A7CB830>I<003FB512E0 -4814FCB67E6F7E6C816C813A03F8007FF0ED1FF8150F6F7E6F7E15016F7EA2EE7F80A216 -3F17C0161FA4EE0FE0AC161F17C0A3163F1780A2167F17005E4B5A15034B5A150F4B5AED -7FF0003FB65A485DB75A93C7FC6C14FC6C14E02B387FB730>I<007FB7FCB81280A47ED8 -03F8C7123FA8EE1F0093C7FCA4157C15FEA490B5FCA6EBF800A4157C92C8FCA5EE07C0EE -0FE0A9007FB7FCB8FCA46C16C02B387EB730>I<003FB712804816C0B8FCA27E7ED801FC -C7121FA8EE0F8093C7FCA5153E157FA490B6FCA69038FC007FA4153E92C8FCAE383FFFF8 -487FB5FCA27E6C5B2A387EB730>I<02FF13F00103EBC0F8010F13F1013F13FD4913FF90 -B6FC4813C1EC007F4848133F4848131F49130F485A491307121F5B123F491303A2127F90 -C7FC6F5A92C8FC5A5AA892B5FC4A14805CA26C7F6C6D1400ED03F8A27F003F1407A27F12 -1F6D130F120F7F6C6C131FA2D803FE133F6C6C137FECC1FF6C90B5FC7F6D13FB010F13F3 -0103EBC1F0010090C8FC293A7DB830>I<3B3FFF800FFFE0486D4813F0B56C4813F8A26C -496C13F06C496C13E0D803F8C7EAFE00B290B6FCA601F8C7FCB3A23B3FFF800FFFE0486D -4813F0B56C4813F8A26C496C13F06C496C13E02D387FB730>I<007FB6FCB71280A46C15 -00260007F0C7FCB3B3A8007FB6FCB71280A46C1500213879B730>I<49B512F04914F85B -A27F6D14F090C7EAFE00B3B3123C127EB4FCA24A5A1403EB8007397FF01FF86CB55A5D6C -5C00075C000149C7FC38003FF025397AB730>II<383FFFF8487FB57EA26C5B6C -5BD801FCC9FCB3B0EE0F80EE1FC0A9003FB7FC5AB8FCA27E6C16802A387EB730>III< -90383FFFE048B512FC000714FF4815804815C04815E0EBF80001E0133FD87F80EB0FF0A2 -90C71207A44815F8481403B3A96C1407A26C15F0A36D130FA26D131F6C6CEB3FE001F813 -FF90B6FC6C15C06C15806C1500000114FCD8003F13E0253A7BB830>I<007FB512F0B612 -FE6F7E16E0826C813903F8003FED0FFCED03FE15016F7EA2821780163FA6167F17005EA2 -4B5A1503ED0FFCED3FF890B6FC5E5E16804BC7FC15F001F8C9FCB0387FFFC0B57EA46C5B -29387EB730>I<90383FFFE048B512FC000714FF4815804815C04815E0EBF80001E0133F -4848EB1FF049130F90C71207A44815F8481403B3A8147E14FE6CEBFF076C15F0EC7F87A2 -EC3FC7018013CF9038C01FFFD83FE014E0EBF80F90B6FC6C15C06C15806C1500000114FC -D8003F7FEB00016E7EA21680157F16C0153F16E0151F16F0150FED07E025467BB830>I< -003FB57E4814F0B612FC15FF6C816C812603F8017F9138003FF0151F6F7E150715038215 -01A515035E1507150F4B5A153F4AB45A90B65A5E93C7FC5D8182D9F8007FED3FE0151F15 -0F821507A817F8EEF1FCA53A3FFF8003FB4801C0EBFFF8B56C7E17F06C496C13E06C49EB -7FC0C9EA1F002E397FB730>I<90390FF803C0D97FFF13E048B512C74814F74814FF5A38 -1FF80F383FE001497E4848137F90C7123F5A48141FA2150FA37EED07C06C91C7FC7F7FEA -3FF0EA1FFEEBFFF06C13FF6C14E0000114F86C80011F13FF01031480D9003F13C0140191 -38007FE0151FED0FF0A2ED07F8A2007C140312FEA56C140716F07F6DEB0FE06D131F01F8 -EB3FC001FF13FF91B51280160000FD5CD8FC7F13F8D8F81F5BD878011380253A7BB830> -I<003FB712C04816E0B8FCA43AFE003F800FA8007CED07C0C791C7FCB3B1011FB5FC4980 -A46D91C7FC2B387EB730>I<3B7FFFC007FFFCB56C4813FEA46C496C13FCD803F8C7EA3F -80B3B16D147F00011600A36C6C14FE6D13016D5CEC800390393FE00FF890391FF83FF06D -B55A6D5C6D5C6D91C7FC9038007FFCEC1FF02F3980B730>III<3A3FFF01FFF84801837F02C77FA202835B6C0101 -5B3A01FC007F806D91C7FC00005C6D5BEB7F01EC81FCEB3F8314C3011F5B14E7010F5B14 -FF6D5BA26D5BA26D5BA26D90C8FCA4497FA2497FA2815B81EB0FE781EB1FC381EB3F8181 -EB7F0081497F49800001143F49800003141F49800007140FD87FFEEB7FFFB590B5128080 -A25C6C486D130029387DB730>II<001FB612FC4815FE5AA490C7EA03FCED07F816 -F0150FED1FE016C0153FED7F80003E1500C85A4A5A5D14034A5A5D140F4A5A5D143F4A5A -92C7FC5C495A5C1303495A5C130F495A5C133F495A91C8FC5B4848147C4914FE1203485A -5B120F485A5B123F485A90B6FCB7FCA46C15FC27387CB730>I<007FB5FCB61280A41500 -48C8FCB3B3B3A5B6FC1580A46C140019476DBE30>I<007FB5FCB61280A47EC7123FB3B3 -B3A5007FB5FCB6FCA46C140019477DBE30>93 D<1307EB1FC0EB7FF0497E000313FE000F -EBFF80003F14E0D87FFD13F039FFF07FF8EBC01FEB800F38FE0003007CEB01F00010EB00 -401D0E77B730>I<007FB612F0A2B712F8A36C15F0A225077B7D30>I97 DII<913801FFE0 -4A7F5CA28080EC0007AAEB03FE90381FFF874913E790B6FC5A5A481303380FFC00D81FF0 -133F49131F485A150F4848130790C7FCA25AA25AA87E6C140FA27F003F141F6D133F6C7E -6D137F390FF801FF2607FE07EBFFC06CB712E06C16F06C14F76D01C713E0011F010313C0 -D907FCC8FC2C397DB730>I<49B4FC010713E0011F13F8017F7F90B57E48804801811380 -3A07FC007FC04848133FD81FE0EB1FE0150F484814F0491307127F90C7FCED03F85A5AB7 -FCA516F048C9FC7E7EA27F003FEC01F06DEB03F86C7E6C7E6D1307D807FEEB1FF03A03FF -C07FE06C90B5FC6C15C0013F14806DEBFE00010713F8010013C0252A7CA830>IIII<14E0EB03F8A2497EA36D5AA2EB00E091C8FCA9381FFFF8487F5AA27E7EEA0001B3 -A9003FB612C04815E0B7FCA27E6C15C023397AB830>III<387FFFF8B57EA47EEA0001B3B3A8007FB612F0B712F8A46C15F025 -387BB730>I<02FC137E3B7FC3FF01FF80D8FFEF01877F90B500CF7F15DF92B57E6C010F -13872607FE07EB03F801FC13FE9039F803FC01A201F013F8A301E013F0B3A23C7FFE0FFF -07FF80B548018F13C0A46C486C01071380322881A730>II<49B4FC010F13E0013F13F8497F90B57E0003ECFF8014013A07FC007FC04848EB3F -E0D81FE0EB0FF0A24848EB07F8491303007F15FC90C71201A300FEEC00FEA86C14016C15 -FCA26D1303003F15F86D13076D130F6C6CEB1FF06C6CEB3FE06D137F3A07FF01FFC06C90 -B512806C15006C6C13FC6D5B010F13E0010190C7FC272A7CA830>II<49B413F8010FEBC1FC013F13F14913FD48B6FC -5A481381390FFC007F49131F4848130F491307485A491303127F90C7FC15015A5AA77E7E -15037FA26C6C1307150F6C6C131F6C6C133F01FC137F3907FF01FF6C90B5FC6C14FD6C14 -F9013F13F1010F13C1903803FE0190C7FCAD92B512F84A14FCA46E14F82E3C7DA730>I< -ED07F83A3FFF803FFF486DB51280B512C302CF14C06C13DF6C9038FFFC3FD8001F13E092 -38801F809238000F004A90C7FC5C5C5CA25CA45CAF003FB512FC4880B7FCA26C5C6C5C2A -287EA730>I<90381FFC1E48B5129F000714FF5A5A5A387FF007EB800100FEC7FC4880A4 -6C143E007F91C7FC13E06CB4FC6C13FC6CEBFF806C14E0000114F86C6C7F01037F903800 -0FFF02001380007C147F00FEEC1FC0A2150F7EA27F151F6DEB3F806D137F9039FC03FF00 -90B6FC5D5D00FC14F0D8F83F13C026780FFEC7FC222A79A830>II -I<3B3FFFC07FFF80486DB512C0B515E0A26C16C06C496C13803B01F80003F000A26D1307 -00005DA26D130F017E5CA2017F131F6D5CA2EC803F011F91C7FCA26E5A010F137EA2ECE0 -FE01075BA214F101035BA3903801FBF0A314FF6D5BA36E5A6E5A2B277EA630>I<3B3FFF -C01FFFE0486D4813F0B515F8A26C16F06C496C13E0D807E0C7EA3F00A26D5C0003157EA5 -6D14FE00015DEC0F80EC1FC0EC3FE0A33A00FC7FF1F8A2147DA2ECFDF9017C5C14F8A301 -7E13FBA290393FF07FE0A3ECE03FA2011F5C90390F800F802D277FA630>I<3A3FFF81FF -FC4801C37FB580A26C5D6C01815BC648C66CC7FC137FEC80FE90383F81FC90381FC3F8EB -0FE3ECE7F06DB45A6D5B7F6D5B92C8FC147E147F5C497F81903803F7E0EB07E790380FE3 -F0ECC1F890381F81FC90383F80FE90387F007E017E137F01FE6D7E48486D7E267FFF80B5 -FCB500C1148014E3A214C16C0180140029277DA630>I<3B3FFFC07FFF80486DB512C0B5 -15E0A26C16C06C496C13803B01FC0003F000A2000014076D5C137E150F017F5C7F151FD9 -1F805BA214C0010F49C7FCA214E00107137EA2EB03F0157C15FCEB01F85DA2EB00F9ECFD -F0147D147FA26E5AA36E5AA35DA2143F92C8FCA25C147EA2000F13FE486C5AEA3FC1EBC3 -F81387EB8FF0EBFFE06C5B5C6C90C9FC6C5AEA01F02B3C7EA630>I<001FB612FC4815FE -5AA316FC90C7EA0FF8ED1FF0ED3FE0ED7FC0EDFF80003E491300C7485A4A5A4A5A4A5A4A -5A4A5A4A5A4990C7FC495A495A495A495A495A495A4948133E4890C7127F485A485A485A -485A485A48B7FCB8FCA46C15FE28277DA630>II<127CA212FEB3B3B3AD127CA207476CBE30>II<01 -7C133848B4137C48EB80FE4813C14813C348EBEFFC397FEFFFF0D8FF8713E0010713C048 -6C1380D87C0113003838007C1F0C78B730>I E -%EndDVIPSBitmapFont -%DVIPSBitmapFont: Fj cmb10 10.95 87 -/Fj 87 125 df11 DII<913B07FF8003FF80027FD9E03F13E049B56CB512F8010702FB80 -011F0103B5EA01FE90263FF8019038F800FF90267FE003495A4948484948138002804A5A -48010014805D481600497FA26F6E13006F6E5AF0007896C7FCA6F17F80BBFCA50003D900 -01EB800384B3ABB5D8F83FD9FC3F13FEA547407EBF4C>I33 DI37 DII<147814F81301EB03F0EB07E0EB0FC0A2EB1F80133FEB7F00A213FE -A2485A1203A25B1207A2485AA3121F5BA2123FA4485AA612FFB3A2127FA66C7EA4121FA2 -7F120FA36C7EA212037FA212016C7EA2137FA2EB3F80131FEB0FC0A2EB07E0EB03F0EB01 -F813001478155A78C323>I<127012F87E127E7E6C7EA26C7E7F6C7EA26C7EA26C7E7FA2 -12007FA2EB7F80A314C0133FA214E0A4EB1FF0A614F8B3A214F0A6EB3FE0A414C0A2137F -1480A3EBFF00A25B1201A25B485AA2485AA2485A5B485AA248C7FC127E5A5A1270155A7B -C323>II44 -DIIII<143C147CEB01FC1307131FEA03FFB5FCA4EAFC -1F1200B3B3A8007FB6FCA5203C7ABB2D>IIII<000E1407D80F -80133F9038F801FF90B6FC5D5DA25D5D5D158092C7FC14FC14F00180C8FCA9EB87FE9038 -9FFFC090B512F0819038FC0FFC9038F007FE9038C003FF0180148016C0497EC714E0A416 -F0A21207EA1FC0EA3FE0EA7FF012FF13F8A316E013F0A26C484813C01380D87E0014806C -491300391FC00FFE390FF03FFC6CB55A6C5C6C14C06C91C7FCEB1FF8243D7CBB2D>II<121F7F13F090B612FCA45A16F816F016E0A216C016805A -007EC7EA3F00157E007C147C15FC4A5A4A5A485C14074A5AC75B141F4AC7FCA25C14FEA2 -1301A2495AA31307A2130FA25C131FA3133FA6137FAA6D5A6D5A6D5A263F7BBD2D>IIIII<007FB912E0BA12F0A4003F18E0CDFCB0003FB912E0BA12F0A46C -18E03C1C7BA447>61 D63 D65 -DIIIIIIII<010FB61280A5D90001 -EBE000B3B3A4EA1FE0EA3FF0EA7FF8A2EAFFFCA44A5BA26C48485B01F091C7FC6C48485A -391FF03FFC6CB55A6C5C000114C026001FFCC8FC293F81BD2F>IIIII<913801FFC0021F13FC91B67E01 -0315E04901807F903A1FFE003FFCD93FF8EB0FFE49486D7E49486D7F48496D7F48834A7F -48834890C86C7EA2488349153FA2003F83A249151F007F83A400FF1880AE007F1800A36D -5DA2003F5FA36C6C4B5AA26C5F6E14FF6C5F6C6D495B6E5B6C5F6C6D495B6D6C4990C7FC -D93FFEEB3FFE6D6C6CB45A010790B512F06D5D01001580021F01FCC8FC020113C039407B -BE44>II<913801FFC0021F13FC91B67E010315E04901807F903A1FFE003FFCD93FF8EB -0FFE49486D7E49486D7F48496D7F48496D7FA24890C86C7E488349153F001F83A249151F -003F83A3007F834981A300FF1880AE007F1800A46D5D003F5FA36C6C4B5AA2000F027F5C -903AFF01FF807F6C4901E05B6C01876D485A15C16C9026CF80795B6C9026EF007F5B6DB4 -6D90C7FC6D5D6D01805B01079038C0FFF06D90B55A0100ED8001021FED03800201EBCFC0 -91C7EA0FE01807EFF81F94B5FCA3701400A4705BA2705B60705B715AEF1FC039517BBE44 ->III<003FB812FCA5D9FC03EBE01FD87FE0ED07FE01 -C01501018015001300007E177EA3007C173EA400FC173F48171FA5C71600B3AF013FB612 -FEA5383D7DBC3F>IIII<003FB5D8FC0F -B512F0A5D8003F90C7380FC0006D6D495A4DC7FC6D7F6D6D137E5F6D7F4C5A6D6D485A6D -13FC4C5ADA7FFE5B160F6E6C485A6E139F04BFC8FC6E13FEA26E5B6E5BA28082806F7EA2 -6F7E83A24B7F92B5FC83DA01FB7FEC03F303F17FEC07E1DA0FC07F707EEC1F804B6C7E5C -027E6D7F707F5C49486D7FA249486D7F49486D7FA249486D7F49486E7EA2B6011FB6FCA5 -403E7EBD45>II<003FB712F8A5DAC00313F09026FC000713E05B01E04913C05B49491380127F90 -C74813004B5A127E4B5AA24A5B127C4A5B5C5E4A5BC7FC4A5BA24A90C7FC5C5D4A5AA249 -5BA2495B5B5D4949137CA2495BA24990C7FC4915FC4A14F8495AA2485B1601485BA24849 -1303485B16074849130F161F4890C7123FEEFFF04848130FB8FCA52E3E7BBD38>II<0160130301F0EB07800001140F0003 -141FD807C0EB3E00A248485B48C75A001E5C003E1301003C5C007C1303A200785CA200F8 -1307485CA2D8F7E013BFD8FFF0EBFF8001F814C001FC14E001FE14F0A3007F7FA36C486C -13E06C486C13C06C48EB7F806C48EB3F00242077BE31>II97 D<13FFB5FCA512077EAFEC03FE -91381FFFC0027F13F091B57E9138FE0FFE9138F003FF4A7E02C014804A6C13C017E0A3EE -7FF0A317F8AC17F0A3EEFFE0A217C05D02C014806E4813006E485A9138FC1FFC01FCB55A -496C13E0D9F01F1380C7D807FCC7FC2D407EBE33>III<49B47E010F13F0013F7F90B512FE48EBC3 -FF48010013804848EB7FC04848133F001F15E05B003FEC1FF0A2485A150F16F8A212FFA2 -90B6FCA401F0C8FCA5127FA37F003F15F8A26C6C1301000F15F06D13036C6CEB07E06C90 -38800FC06C9038F07F806C6CB512006D5B010F13F8010013C0252B7EA92A>III<13FFB5FCA512077EAFED7F -E0913801FFF802077F4A7F91381FC3FFDA3E031380147CEC780102F014C014E0A214C0A3 -1480B3A4B5D8FE1F13FFA5303F7EBE33>III<13FFB5FCA512077EB092B512E0A592380FE000 -4B5A4B5A4BC7FC15FE4A5A4A5A4A5A4A5A4A5A143FECFFE0A28181A2ECDFFCEC8FFE140F -6E7E6E7FA26E7F6E7FA26F7E6F7EA26F7E6F7EB539FC7FFFF8A52D3F7FBE30>I<13FFB5 -FCA512077EB3B3AFB512FCA5163F7EBE19>I<01FFD97FE0EB3FF0B52601FFF8EBFFFC02 -07D9FC037F4A6D487F91281FC3FF0FE17FDA3E03D99F017F0007017C14BE6CD97801EBBC -0002F002F88002E05CA202C05CA302805CB3A4B5D8FE1FD9FF0FEBFF80A549297EA84C> -I<01FFEB7FE0B53801FFF802077F4A7F91381FC3FFDA3E0313800007137C6CEB780102F0 -14C014E0A214C0A31480B3A4B5D8FE1F13FFA530297EA833>I<49B47E010F13F0013F13 -FC90B6FC48018113803A03FE007FC04848EB3FE0000F15F049131F001F15F8A24848EB0F -FCA2007F15FEA400FF15FFAB007F15FEA3003F15FC6D131F001F15F8A26C6CEB3FF00007 -15E06C6CEB7FC03A01FF81FF806C90B51200013F13FC010F13F001011380282B7EA92D> -I<9038FF03FEB5381FFFC0027F13F091B57E9138FE1FFE9138F007FF0007497E6C01C014 -804A6C13C017E081A217F0A2167F17F8ACEEFFF0A317E05D17C0A26E4813806E4813006E -485A9138FC1FFC91B55A16E0029F1380DA87FCC7FC0280C8FCACB512FEA52D3B7EA833> -I<49B4131E010FEBC03E013FEBE07E90B5EAF0FE48EBC1F948EB807F48EB003F485A4848 -131FA2003F140F5B127F1507A2485AAC127F7FA2123F150F6C7E151F6C7E0007143F6C6C -13FF6C13C36C90B5FC6D13EF011F138F903803FE0F90C7FCAC0203B512F8A52D3B7DA830 ->I<3901FE07F800FFEB0FFE91383FFF804A13C0EC7C7F9138F8FFE03807FFF06C5B5CA2 -ED7FC09138803F80ED1F0092C7FCA291C8FCB3A3B6FCA523297FA827>I<90387FE1E038 -03FFFB4813FF5A381FE07F383F801F387F000F007E130712FE1403A27EA26DC7FC13F013 -FF6C13F014FC6C13FF15806C14C07E6C14E0000114F06C7E010313F8EB003F140F007813 -0712F814037EA36C14F06C1307A29038800FE09038F03FC090B51280150000F813FC38E0 -3FF01D2B7DA924>I<131FA65BA55BA25BA25A5A5A001FEBFFC0B6FCA4000790C7FCB3EC -03E0A97EEC87C0A26CEBCF806C13FF6D1300EB1FFEEB07F81B3B7EB923>IIII<3B7FFFFC7FFFE0A5C69039E007E0004B5A -90387FF01FD93FF85B4BC7FC90381FFC7EEB0FFE5D6D6C5A7F5D6D5B7FA26E7E143F814A -7E14FF81497FEB03F301078002E17FEB0FC049487F6F7E90383F003F017E806F7EB500E0 -B512F8A52D287FA730>II<001FB61280A4D9F80F1300EBE01F01C05B49485A130048495A4A5AA2D83E01 -5B5B5D495BA2C64890C7FC5B5C495A017FEB0F8014F8EBFFF0A248EBE01F48150014C048 -1380A248495A485A5D48485B007F5B9038F80FFEB6FCA421287EA728>III E -%EndDVIPSBitmapFont -%DVIPSBitmapFont: Fk cmbx12 14.4 49 -/Fk 49 122 df12 D45 DI<913803FFC0023F13FC91B6FC010315 -C0010F018113F0903A1FFC003FF849486D7E49486D7E49486D7E48496D138048496D13C0 -A24817E04890C813F0A34817F8A24817FC49157FA3007F17FEA600FF17FFB3A5007F17FE -A6003F17FCA26D15FFA26C17F8A36C17F0A26C6D4913E0A26C6D4913C06C17806E5B6C6D -4913006D6C495AD91FFCEB3FF8903A0FFF81FFF06D90B55A01011580D9003F01FCC7FC02 -0313C0384F7BCD43>48 D<157815FC14031407141F14FF130F0007B5FCB6FCA2147F13F0 -EAF800C7FCB3B3B3A6007FB712FEA52F4E76CD43>I -I<91380FFFC091B512FC0107ECFF80011F15E090263FF8077F9026FF800113FC4848C76C -7ED803F86E7E491680D807FC8048B416C080486D15E0A4805CA36C17C06C5B6C90C75AD8 -01FC1680C9FC4C13005FA24C5A4B5B4B5B4B13C04B5BDBFFFEC7FC91B512F816E016FCEE -FF80DA000713E0030113F89238007FFE707E7013807013C018E07013F0A218F8A27013FC -A218FEA2EA03E0EA0FF8487E487E487EB57EA318FCA25E18F891C7FC6C17F0495C6C4816 -E001F04A13C06C484A1380D80FF84A13006CB44A5A6CD9F0075BC690B612F06D5D011F15 -80010302FCC7FCD9001F1380374F7ACD43>I<177C17FEA2160116031607160FA2161F16 -3F167FA216FF5D5DA25D5DED1FBFED3F3F153E157C15FCEC01F815F0EC03E01407EC0FC0 -1580EC1F005C147E147C5C1301495A495A5C495A131F49C7FC133E5B13FC485A5B485A12 -07485A485A90C8FC123E127E5ABA12C0A5C96C48C7FCAF020FB712C0A53A4F7CCE43>I< -D80380150ED807E0157E01FEEC03FED9FFF0137F91B65A5F5F5F5F5F94C7FC5E5E16F016 -C093C8FC15F801E190C9FC01E0CAFCABEC0FFF027F13F001E3B512FE01E76E7E9026FFF8 -077FDAC0017F49C713F8496E7E49143F4981496E7E6C481680C9FC18C08218E0A418F0A3 -EA0FE0487E487E487E487EA418E0A35B6C484A13C05B491680003EC85A003F17006C6C4A -5A6D5D6C6C4A5AD807F8495BD803FE01075B2701FFC03F5B6C90B65A013F4AC7FC6D14F8 -010314C09026007FF8C8FC344F79CD43>II<121F7F7FEBFF8091B81280A45A1900606060A260 -6060485F0180C86CC7FC007EC95A4C5A007C4B5A5F4C5A160F4C5A484B5A4C5A94C8FC16 -FEC812014B5A5E4B5A150F4B5AA24B5AA24B5A15FFA24A90C9FCA25C5D1407A2140FA25D -141FA2143FA4147F5DA314FFA55BAC6D5BA2EC3FC06E5A395279D043>I<913807FFC002 -7F13FC0103B67E010F15E090261FFC0113F8903A3FE0003FFCD97F80EB0FFE49C76C7E48 -488048486E1380000717C04980120F18E0177FA2121F7FA27F7F6E14FF02E015C014F802 -FE4913806C7FDBC00313009238F007FE6C02F85B9238FE1FF86C9138FFBFF06CEDFFE017 -806C4BC7FC6D806D81010F15E06D81010115FC010781011F81491680EBFFE748018115C0 -48D9007F14E04848011F14F048487F48481303030014F8484880161F4848020713FC1601 -824848157F173FA2171FA2170FA218F8A27F007F17F06D151FA26C6CED3FE0001F17C06D -157F6C6CEDFF806C6C6C010313006C01E0EB0FFE6C01FCEBFFFC6C6CB612F06D5D010F15 -80010102FCC7FCD9000F13C0364F7ACD43>I<91B5FC010F14F8017F14FF90B712C00003 -D9C00F7F2707FC00017FD80FE06D7F48486E7E48C87FD87FE06E7E7F7F486C1680A66C5A -18006C485C6C5AC9485A5F4B5B4B5B4B5B4B5B4B90C7FC16FC4B5A4B5A16C04B5A93C8FC -4A5A5D14035D5D14075DA25D140FA25DAB91CAFCAAEC1FC04A7EECFFF8497FA2497FA76D -5BA26D5BEC3FE06E5A315479D340>63 D68 DII72 -DI<027FB71280A591C76C -90C7FCB3B3B3EA07F0EA1FFC487E487EA2B57EA44C5AA34A485B7E49495BD83FF8495BD8 -1FE05DD80FFC011F5B2707FF807F90C8FC000190B512FC6C6C14F0011F14C0010101F8C9 -FC39537DD145>I76 -DI80 D82 D<91260FFF80130791B500F85B010702FF5B011FEDC0 -3F49EDF07F9026FFFC006D5A4801E0EB0FFD4801800101B5FC4848C87E48488149150F00 -1F824981123F4981007F82A28412FF84A27FA26D82A27F7F6D93C7FC14C06C13F014FF15 -F86CECFF8016FC6CEDFFC017F06C16FC6C16FF6C17C06C836C836D826D82010F82130301 -0082021F16801400030F15C0ED007F040714E01600173F050F13F08383A200788200F882 -A3187FA27EA219E07EA26CEFFFC0A27F6D4B13806D17006D5D01FC4B5A01FF4B5A02C04A -5A02F8EC7FF0903B1FFFC003FFE0486C90B65AD8FC0393C7FC48C66C14FC48010F14F048 -D9007F90C8FC3C5479D24B>I<003FBC1280A59126C0003F9038C0007F49C71607D87FF8 -060113C001E08449197F49193F90C8171FA2007E1A0FA3007C1A07A500FC1BE0481A03A6 -C994C7FCB3B3AC91B912F0A553517BD05E>II87 D97 DI<913801FFF8021FEBFF8091B612F00103 -15FC010F9038C00FFE903A1FFE0001FFD97FFC491380D9FFF05B4817C048495B5C5A485B -A2486F138091C7FC486F1300705A4892C8FC5BA312FFAD127F7FA27EA2EF03E06C7F1707 -6C6D15C07E6E140F6CEE1F806C6DEC3F006C6D147ED97FFE5C6D6CEB03F8010F9038E01F -F0010390B55A01001580023F49C7FC020113E033387CB63C>I<4DB47E0407B5FCA5EE00 -1F1707B3A4913801FFE0021F13FC91B6FC010315C7010F9038E03FE74990380007F7D97F -FC0101B5FC49487F4849143F484980485B83485B5A91C8FC5AA3485AA412FFAC127FA36C -7EA37EA26C7F5F6C6D5C7E6C6D5C6C6D49B5FC6D6C4914E0D93FFED90FEFEBFF80903A0F -FFC07FCF6D90B5128F0101ECFE0FD9003F13F8020301C049C7FC41547CD24B>I<913803 -FFC0023F13FC49B6FC010715C04901817F903A3FFC007FF849486D7E49486D7E4849130F -48496D7E48178048497F18C0488191C7FC4817E0A248815B18F0A212FFA490B8FCA318E0 -49CAFCA6127FA27F7EA218E06CEE01F06E14037E6C6DEC07E0A26C6DEC0FC06C6D141F6C -6DEC3F806D6CECFF00D91FFEEB03FE903A0FFFC03FF8010390B55A010015C0021F49C7FC -020113F034387CB63D>II -II<137F497E000313E0487FA2487FA76C5BA26C5BC613806DC7FC -90C8FCADEB3FF0B5FCA512017EB3B3A6B612E0A51B547BD325>I107 DIII<913801FFE002 -1F13FE91B612C0010315F0010F9038807FFC903A1FFC000FFED97FF86D6C7E49486D7F48 -496D7F48496D7F4A147F48834890C86C7EA24883A248486F7EA3007F1880A400FF18C0AC -007F1880A3003F18006D5DA26C5FA26C5F6E147F6C5F6C6D4A5A6C6D495B6C6D495B6D6C -495BD93FFE011F90C7FC903A0FFF807FFC6D90B55A010015C0023F91C8FC020113E03A38 -7CB643>I<903A3FF001FFE0B5010F13FE033FEBFFC092B612F002F301017F913AF7F800 -7FFE0003D9FFE0EB1FFFC602806D7F92C76C7F4A824A6E7F4A6E7FA2717FA285187F85A4 -721380AC1A0060A36118FFA2615F616E4A5BA26E4A5B6E4A5B6F495B6F4990C7FC03F0EB -FFFC9126FBFE075B02F8B612E06F1480031F01FCC8FC030313C092CBFCB1B612F8A5414D -7BB54B>I<90397FE003FEB590380FFF80033F13E04B13F09238FE1FF89139E1F83FFC00 -03D9E3E013FEC6ECC07FECE78014EF150014EE02FEEB3FFC5CEE1FF8EE0FF04A90C7FCA5 -5CB3AAB612FCA52F367CB537>114 D<903903FFF00F013FEBFE1F90B7FC120348EB003F -D80FF81307D81FE0130148487F4980127F90C87EA24881A27FA27F01F091C7FC13FCEBFF -C06C13FF15F86C14FF16C06C15F06C816C816C81C681013F1580010F15C01300020714E0 -EC003F030713F015010078EC007F00F8153F161F7E160FA27E17E07E6D141F17C07F6DEC -3F8001F8EC7F0001FEEB01FE9039FFC00FFC6DB55AD8FC1F14E0D8F807148048C601F8C7 -FC2C387CB635>I<143EA6147EA414FEA21301A313031307A2130F131F133F13FF5A000F -90B6FCB8FCA426003FFEC8FCB3A9EE07C0AB011FEC0F8080A26DEC1F0015806DEBC03E6D -EBF0FC6DEBFFF86D6C5B021F5B020313802A4D7ECB34>IIII121 D E -%EndDVIPSBitmapFont -%DVIPSBitmapFont: Fl cmr10 10.95 51 -/Fl 51 122 df12 D<1430147014E0EB01C0EB038013 -07EB0F00131E133E133C5B13F85B12015B1203A2485AA2120F5BA2121F90C7FCA25AA312 -3E127EA6127C12FCB2127C127EA6123E123FA37EA27F120FA27F1207A26C7EA212017F12 -007F13787F133E131E7FEB07801303EB01C0EB00E014701430145A77C323>40 -D<12C07E12707E7E121E7E6C7E7F12036C7E7F12007F1378137CA27FA2133F7FA2148013 -0FA214C0A3130714E0A6130314F0B214E01307A614C0130FA31480A2131F1400A25B133E -A25BA2137813F85B12015B485A12075B48C7FC121E121C5A5A5A5A145A7BC323>I45 D<121EEA7F80A2EAFFC0A4EA7F80A2EA1E000A0A798919>I48 DIII<150E151E153EA2157EA215FE1401A21403EC077E1406140E141C -A214381470A214E0EB01C0A2EB0380EB0700A2130E5BA25B5BA25B5B1201485A90C7FC5A -120E120C121C5AA25A5AB8FCA3C8EAFE00AC4A7E49B6FCA3283E7EBD2D>I<00061403D8 -0780131F01F813FE90B5FC5D5D5D15C092C7FC14FCEB3FE090C9FCACEB01FE90380FFF80 -90383E03E090387001F8496C7E49137E497F90C713800006141FC813C0A216E0150FA316 -F0A3120C127F7F12FFA416E090C7121F12FC007015C012780038EC3F80123C6CEC7F0000 -1F14FE6C6C485A6C6C485A3903F80FE0C6B55A013F90C7FCEB07F8243F7CBC2D>II<1238123C123F90B612FCA316F85A16F0 -16E00078C712010070EC03C0ED078016005D48141E151C153C5DC8127015F04A5A5D1403 -4A5A92C7FC5C141EA25CA2147C147814F8A213015C1303A31307A3130F5CA2131FA6133F -AA6D5A0107C8FC26407BBD2D>III<121EEA7F80A2EAFFC0 -A4EA7F80A2EA1E00C7FCB3121EEA7F80A2EAFFC0A4EA7F80A2EA1E000A2779A619>I67 DII72 DI<011FB512FCA3D9000713006E5A1401B3B3A6123FEA7F80EAFFC0A44A -5A1380D87F005B007C130700385C003C495A6C495A6C495A2603E07EC7FC3800FFF8EB3F -C026407CBD2F>I76 DII80 D83 D<003FB91280A3903AF0007FE001018090393FC0003F48C7ED1FC0007E1707127C -00781703A300701701A548EF00E0A5C81600B3B14B7E4B7E0107B612FEA33B3D7DBC42> -I89 D97 DI<49B4FC010F13E090383F00F8 -017C131E4848131F4848137F0007ECFF80485A5B121FA24848EB7F00151C007F91C7FCA2 -90C9FC5AAB6C7EA3003FEC01C07F001F140316806C6C13076C6C14000003140E6C6C131E -6C6C137890383F01F090380FFFC0D901FEC7FC222A7DA828>III -I<167C903903F801FF903A1FFF078F8090397E0FDE1F9038F803F83803F001A23B07E000 -FC0600000F6EC7FC49137E001F147FA8000F147E6D13FE00075C6C6C485AA23901F803E0 -3903FE0FC026071FFFC8FCEB03F80006CAFC120EA3120FA27F7F6CB512E015FE6C6E7E6C -15E06C810003813A0FC0001FFC48C7EA01FE003E140048157E825A82A46C5D007C153E00 -7E157E6C5D6C6C495A6C6C495AD803F0EB0FC0D800FE017FC7FC90383FFFFC010313C029 -3D7EA82D>III108 D<2701F801FE14FF00FF902707FFC00313E0913B1E07E00F03F0 -913B7803F03C01F80007903BE001F87000FC2603F9C06D487F000101805C01FBD900FF14 -7F91C75B13FF4992C7FCA2495CB3A6486C496CECFF80B5D8F87FD9FC3F13FEA347287DA7 -4C>I<3901F801FE00FF903807FFC091381E07E091387803F000079038E001F82603F9C0 -7F0001138001FB6D7E91C7FC13FF5BA25BB3A6486C497EB5D8F87F13FCA32E287DA733> -I<14FF010713E090381F81F890387E007E01F8131F4848EB0F804848EB07C04848EB03E0 -000F15F04848EB01F8A2003F15FCA248C812FEA44815FFA96C15FEA36C6CEB01FCA3001F -15F86C6CEB03F0A26C6CEB07E06C6CEB0FC06C6CEB1F80D8007EEB7E0090383F81FC9038 -0FFFF0010090C7FC282A7EA82D>I<3901FC03FC00FF90381FFF8091387C0FE09039FDE0 -03F03A07FFC001FC6C496C7E6C90C7127F49EC3F805BEE1FC017E0A2EE0FF0A3EE07F8AA -EE0FF0A4EE1FE0A2EE3FC06D1580EE7F007F6E13FE9138C001F89039FDE007F09039FC78 -0FC0DA3FFFC7FCEC07F891C9FCAD487EB512F8A32D3A7EA733>I<3901F807E000FFEB1F -F8EC787CECE1FE3807F9C100031381EA01FB1401EC00FC01FF1330491300A35BB3A5487E -B512FEA31F287EA724>114 D<90383FC0603901FFF8E03807C03F381F000F003E130700 -3C1303127C0078130112F81400A27E7E7E6D1300EA7FF8EBFFC06C13F86C13FE6C7F6C14 -80000114C0D8003F13E0010313F0EB001FEC0FF800E01303A214017E1400A27E15F07E14 -016C14E06CEB03C0903880078039F3E01F0038E0FFFC38C01FE01D2A7DA824>I<131CA6 -133CA4137CA213FCA2120112031207001FB512C0B6FCA2D801FCC7FCB3A215E0A9120090 -38FE01C0A2EB7F03013F138090381F8700EB07FEEB01F81B397EB723>IIIIII E -%EndDVIPSBitmapFont -%DVIPSBitmapFont: Fm cmbx12 20.736 9 -/Fm 9 123 df<92380FFFE04AB67E020F15F0027F15FE49B87E4917E0010F17F8013F83 -49D9C01F14FF9027FFFC0001814801E06D6C80480180021F804890C86C8048486F804848 -6F8001FF6F804801C06E8002F081486D18806E816E18C0B5821BE06E81A37214F0A56C5B -A36C5B6C5B6C5B000313C0C690C9FC90CA15E060A34E14C0A21B80601B0060626295B55A -5F624D5C624D5C4D91C7FC614D5B4D13F04D5B6194B55A4C49C8FC4C5B4C5B4C13E04C5B -604C90C9FCEE7FFC4C5A4B5B4B5B4B0180EC0FF04B90C8FC4B5A4B5A4B48ED1FE0EDFFE0 -4A5B4A5B4A90C9FC4A48163F4A5ADA3FF017C05D4A48167F4A5A4990CA12FFD903FC1607 -49BAFC5B4919805B5B90BBFC5A5A5A5A481A005A5ABCFCA462A44C7176F061>50 -D<92383FFFF80207B612E0027F15FC49B87E010717E0011F83499026F0007F13FC4948C7 -000F7F90B502036D7E486E6D806F6D80727F486E6E7F8486727FA28684A26C5C72806C5C -6D90C8FC6D5AEB0FF8EB03E090CAFCA70507B6FC041FB7FC0303B8FC157F0203B9FC021F -ECFE0391B612800103ECF800010F14C04991C7FC017F13FC90B512F04814C0485C4891C8 -FC485B5A485B5C5A5CA2B5FC5CA360A36E5DA26C5F6E5D187E6C6D846E4A48806C6D4A48 -14FC6C6ED90FF0ECFFFC6C02E090263FE07F14FE00019139FC03FFC06C91B6487E013F4B -487E010F4B1307010303F01301D9003F0280D9003F13FC020101F8CBFC57507ACE5E>97 -D<903801FFFCB6FCA8C67E131F7FB3ADF0FFFC050FEBFFE0057F14FE0403B77E040F16E0 -043F16F84CD9007F13FE9226FDFFF001077F92B500C001018094C86C13E004FC6F7F4C6F -7F04E06F7F4C6F7F5E747F93C915804B7014C0A27414E0A21DF087A21DF8A31DFC87A41D -FEAF1DFCA4631DF8A31DF098B5FC1DE0A25014C0A26F1980501400705D705F704B5B505B -704B5B04FC4B5BDBE7FE92B55A9226C3FF8001035C038101E0011F49C7FC9226807FFC90 -B55A4B6CB712F04A010F16C04A010393C8FC4A010015F84A023F14C090C9000301F0C9FC -5F797AF76C>I<97380FFFE00607B6FCA8F00003190086B3AD93383FFF800307B512F803 -3F14FF4AB712C0020716F0021F16FC027F9039FE007FFE91B500F0EB0FFF010302800101 -90B5FC4949C87E49498149498149498149498190B548814884484A8192CAFC5AA2485BA2 -5A5C5AA35A5CA4B5FCAF7EA4807EA37EA2807EA26C7F616C6E5D6C606C80616D6D5D6D6D -5D6D6D92B67E6D6D4A15FC010301FF0207EDFFFE6D02C0EB3FFE6D6C9039FC01FFF86E90 -B65A020F16C002031600DA007F14FC030F14E09226007FFEC749C7FC5F797AF76C>100 -D105 D<903801FFFCB6FCA8C67E131F7FB3B3B3B3B3ABB812C0A82A7879F7 -35>108 D<902601FFF891380FFFE0B692B512FE05036E7E050F15E0053F15F84D819327 -01FFF01F7F4CD900077FDC07FC6D80C66CDA0FF06D80011FDA1FC07F6D4A48824CC8FC04 -7E6F7F5EEDF9F85E03FB707F5E15FF5EA25EA293C9FCA45DB3B3A6B8D8E003B81280A861 -4E79CD6C>110 D<902601FFFCEC7FFEB6020FB512F0057F14FE4CB712C0040716F0041F -82047F16FE93B5C66C7F92B500F0010F14C0C66C0380010380011F4AC76C806D4A6E8004 -F06F7F4C6F7F4C6F7F4C8193C915804B7014C0861DE0A27414F0A27414F8A47513FCA575 -13FEAF5113FCA598B512F8A31DF0621DE0621DC0621D806F5E701800704B5B505B704B5B -7092B55A04FC4A5C704A5C706C010F5C05E0013F49C7FC9227FE7FFC01B55A70B712F004 -0F16C0040393C8FC040015F8053F14C0050301F0C9FC94CCFCB3A6B812E0A85F6F7ACD6C ->112 D<0007BA12FE1AFFA503E0C76C13FE4AC8B512FC4801F04A14F84A5C4A17F091C8 -4814E04D14C0495D4918804D1400494B5B94B5FC61494A5C4C5C5E61001F4B5C494A5C5E -96C7FC4C5B93B55A5DC85D4B5C4B5C5D604B5C4B91C8FC5D5F92B55A4A5C5C4D14FF4A5C -4A5C5C5F4A91C75A4A4915FE91B5FC5E495C495C4917035E495C495C49170793C8FC4949 -ED0FFC90B55A48181F5D484A153F484A157F4818FF4B1403484A140F4891C8123F480407 -B5FC92B8FCBB12F8A57E484D7BCC56>122 D E -%EndDVIPSBitmapFont -end -%%EndProlog -%%BeginSetup -%%Feature: *Resolution 600dpi -TeXDict begin -%%PaperSize: A4 - -%%EndSetup -%%Page: 1 1 -1 0 bop 150 1318 a Fm(bzip2)64 b(and)g(libbzip2)p 150 -1418 3600 34 v 2010 1515 a Fl(a)31 b(program)f(and)g(library)e(for)i -(data)h(compression)2198 1623 y(cop)m(yrigh)m(t)f(\(C\))h(1996-2002)j -(Julian)28 b(Sew)m(ard)2394 1731 y(v)m(ersion)i(1.0.2)i(of)f(30)g -(Decem)m(b)s(er)g(2001)150 5091 y Fk(Julian)46 b(Sew)l(ard)p -150 5141 3600 17 v eop -%%Page: 1 2 -1 1 bop 3705 -116 a Fl(1)150 299 y(The)40 b(follo)m(wing)f(text)i(is)f -(the)h(License)f(for)g(this)f(soft)m(w)m(are.)73 b(Y)-8 -b(ou)41 b(should)d(\014nd)h(it)h(iden)m(tical)f(to)i(that)150 -408 y(con)m(tained)31 b(in)e(the)h(\014le)g(LICENSE)f(in)g(the)h -(source)h(distribution.)150 565 y Fj(||||||)f(ST)-8 b(AR)g(T)30 -b(OF)h(THE)f(LICENSE)f(||||||)150 722 y(This)24 b(program,)i -Fi(bzip2)p Fj(,)f(and)f(asso)s(ciated)h(library)g Fi(libbzip2)p -Fj(,)e(are)i(Cop)m(yrigh)m(t)h(\(C\))f(1996-2002)j(Julian)150 -832 y(R)i(Sew)m(ard.)41 b(All)30 b(righ)m(ts)h(reserv)m(ed.)150 -989 y(Redistribution)46 b(and)f(use)g(in)g(source)h(and)f(binary)f -(forms,)50 b(with)45 b(or)h(without)g(mo)s(di\014cation,)j(are)150 -1098 y(p)s(ermitted)31 b(pro)m(vided)f(that)h(the)g(follo)m(wing)g -(conditions)g(are)g(met:)225 1255 y Fh(\017)60 b Fj(Redistributions)44 -b(of)g(source)g(co)s(de)f(m)m(ust)h(retain)g(the)g(ab)s(o)m(v)m(e)h -(cop)m(yrigh)m(t)g(notice,)k(this)43 b(list)i(of)330 -1365 y(conditions)31 b(and)e(the)i(follo)m(wing)h(disclaimer.)225 -1499 y Fh(\017)60 b Fj(The)32 b(origin)g(of)g(this)g(soft)m(w)m(are)h -(m)m(ust)f(not)g(b)s(e)f(misrepresen)m(ted;)j(y)m(ou)e(m)m(ust)g(not)g -(claim)g(that)h(y)m(ou)330 1609 y(wrote)d(the)f(original)h(soft)m(w)m -(are.)42 b(If)29 b(y)m(ou)g(use)g(this)g(soft)m(w)m(are)i(in)d(a)h(pro) -s(duct,)g(an)g(ac)m(kno)m(wledgmen)m(t)330 1718 y(in)h(the)h(pro)s -(duct)e(do)s(cumen)m(tation)j(w)m(ould)e(b)s(e)f(appreciated)j(but)d -(is)i(not)g(required.)225 1853 y Fh(\017)60 b Fj(Altered)36 -b(source)g(v)m(ersions)g(m)m(ust)f(b)s(e)f(plainly)i(mark)m(ed)f(as)g -(suc)m(h,)i(and)d(m)m(ust)h(not)h(b)s(e)f(misrepre-)330 -1962 y(sen)m(ted)c(as)f(b)s(eing)g(the)h(original)h(soft)m(w)m(are.)225 -2097 y Fh(\017)60 b Fj(The)25 b(name)g(of)g(the)g(author)g(ma)m(y)h -(not)f(b)s(e)f(used)h(to)g(endorse)g(or)h(promote)g(pro)s(ducts)e -(deriv)m(ed)h(from)330 2206 y(this)30 b(soft)m(w)m(are)i(without)f(sp)s -(eci\014c)f(prior)g(written)h(p)s(ermission.)150 2388 -y(THIS)36 b(SOFTW)-10 b(ARE)35 b(IS)h(PR)m(O)m(VIDED)g(BY)f(THE)h(A)m -(UTHOR)h(\\AS)f(IS")g(AND)f(ANY)h(EXPRESS)150 2498 y(OR)29 -b(IMPLIED)g(W)-10 b(ARRANTIES,)28 b(INCLUDING,)h(BUT)g(NOT)g(LIMITED)g -(TO,)h(THE)e(IMPLIED)150 2607 y(W)-10 b(ARRANTIES)60 -b(OF)h(MER)m(CHANT)-8 b(ABILITY)61 b(AND)g(FITNESS)f(F)m(OR)i(A)f(P)-8 -b(AR)g(TICULAR)150 2717 y(PURPOSE)40 b(ARE)g(DISCLAIMED.)f(IN)i(NO)g -(EVENT)f(SHALL)f(THE)i(A)m(UTHOR)g(BE)f(LIABLE)150 2827 -y(F)m(OR)d(ANY)f(DIRECT,)g(INDIRECT,)g(INCIDENT)-8 b(AL,)37 -b(SPECIAL,)d(EXEMPLAR)-8 b(Y,)36 b(OR)h(CON-)150 2936 -y(SEQUENTIAL)30 b(D)m(AMA)m(GES)h(\(INCLUDING,)h(BUT)e(NOT)i(LIMITED)f -(TO,)h(PR)m(OCUREMENT)150 3046 y(OF)55 b(SUBSTITUTE)e(GOODS)i(OR)f(SER) --10 b(VICES;)53 b(LOSS)g(OF)i(USE,)f(D)m(A)-8 b(T)g(A,)55 -b(OR)g(PR)m(OFITS;)150 3155 y(OR)e(BUSINESS)d(INTERR)m(UPTION\))k(HO)m -(WEVER)e(CA)m(USED)f(AND)h(ON)h(ANY)f(THEOR)-8 b(Y)150 -3265 y(OF)71 b(LIABILITY,)f(WHETHER)g(IN)h(CONTRA)m(CT,)g(STRICT)g -(LIABILITY,)e(OR)i(TOR)-8 b(T)150 3374 y(\(INCLUDING)45 -b(NEGLIGENCE)g(OR)g(OTHER)-10 b(WISE\))45 b(ARISING)g(IN)g(ANY)g(W)-10 -b(A)i(Y)44 b(OUT)i(OF)150 3484 y(THE)i(USE)f(OF)h(THIS)g(SOFTW)-10 -b(ARE,)47 b(EVEN)g(IF)h(AD)m(VISED)e(OF)i(THE)g(POSSIBILITY)f(OF)150 -3594 y(SUCH)30 b(D)m(AMA)m(GE.)150 3750 y(Julian)g(Sew)m(ard,)g(Cam)m -(bridge,)h(UK.)150 3907 y Fi(jseward@acm.org)150 4064 -y(bzip2)p Fj(/)p Fi(libbzip2)c Fj(v)m(ersion)k(1.0.2)i(of)d(30)h(Decem) -m(b)s(er)g(2001.)150 4221 y(||||||)f(END)g(OF)g(THE)g(LICENSE)f(||||||) -150 4378 y(W)-8 b(eb)31 b(sites:)150 4535 y Fi -(http://sources.redhat.co)o(m/bz)o(ip2)150 4691 y -(http://www.cacheprof.org)150 4848 y Fj(P)-8 b(A)g(TENTS:)40 -b(T)-8 b(o)40 b(the)f(b)s(est)g(of)g(m)m(y)h(kno)m(wledge,)i -Fi(bzip2)c Fj(do)s(es)h(not)g(use)g(an)m(y)h(paten)m(ted)g(algorithms.) -150 4958 y(Ho)m(w)m(ev)m(er,)31 b(I)c(do)h(not)g(ha)m(v)m(e)g(the)g -(resources)h(a)m(v)-5 b(ailable)29 b(to)f(carry)g(out)g(a)g(full)f -(paten)m(t)i(searc)m(h.)40 b(Therefore)150 5067 y(I)30 -b(cannot)h(giv)m(e)h(an)m(y)f(guaran)m(tee)g(of)g(the)g(ab)s(o)m(v)m(e) -g(statemen)m(t.)p eop -%%Page: 2 3 -2 2 bop 150 -116 a Fl(Chapter)30 b(1:)41 b(In)m(tro)s(duction)2591 -b(2)150 299 y Fg(1)80 b(In)l(tro)t(duction)150 555 y -Fi(bzip2)40 b Fj(compresses)h(\014les)g(using)g(the)g(Burro)m -(ws-Wheeler)h(blo)s(c)m(k-sorting)h(text)f(compression)f(algo-)150 -665 y(rithm,)54 b(and)49 b(Hu\013man)f(co)s(ding.)98 -b(Compression)49 b(is)h(generally)h(considerably)e(b)s(etter)h(than)f -(that)150 775 y(ac)m(hiev)m(ed)43 b(b)m(y)f(more)g(con)m(v)m(en)m -(tional)i(LZ77/LZ78-based)e(compressors,)j(and)40 b(approac)m(hes)i -(the)g(p)s(er-)150 884 y(formance)31 b(of)f(the)h(PPM)g(family)f(of)h -(statistical)i(compressors.)150 1041 y Fi(bzip2)g Fj(is)h(built)h(on)f -(top)h(of)f Fi(libbzip2)p Fj(,)f(a)i(\015exible)f(library)h(for)f -(handling)g(compressed)g(data)h(in)f(the)150 1151 y Fi(bzip2)29 -b Fj(format.)41 b(This)30 b(man)m(ual)g(describ)s(es)g(b)s(oth)f(ho)m -(w)i(to)g(use)f(the)g(program)h(and)e(ho)m(w)h(to)h(w)m(ork)g(with)150 -1260 y(the)26 b(library)f(in)m(terface.)41 b(Most)26 -b(of)f(the)h(man)m(ual)f(is)g(dev)m(oted)i(to)f(this)f(library)-8 -b(,)27 b(not)f(the)g(program,)g(whic)m(h)150 1370 y(is)k(go)s(o)s(d)h -(news)e(if)i(y)m(our)f(in)m(terest)i(is)f(only)f(in)g(the)h(program.) -150 1527 y(Chapter)45 b(2)h(describ)s(es)e(ho)m(w)i(to)g(use)f -Fi(bzip2)p Fj(;)51 b(this)46 b(is)f(the)h(only)f(part)g(y)m(ou)h(need)f -(to)h(read)f(if)h(y)m(ou)150 1636 y(just)39 b(w)m(an)m(t)g(to)h(kno)m -(w)f(ho)m(w)h(to)f(op)s(erate)h(the)g(program.)67 b(Chapter)38 -b(3)i(describ)s(es)e(the)i(programming)150 1746 y(in)m(terfaces)27 -b(in)f(detail,)i(and)d(Chapter)g(4)h(records)g(some)g(miscellaneous)h -(notes)f(whic)m(h)g(I)g(though)m(t)g(ough)m(t)150 1855 -y(to)31 b(b)s(e)f(recorded)h(somewhere.)p eop -%%Page: 3 4 -3 3 bop 150 -116 a Fl(Chapter)30 b(2:)41 b(Ho)m(w)31 -b(to)g(use)f Fi(bzip2)2375 b Fl(3)150 299 y Fg(2)80 b(Ho)l(w)53 -b(to)g(use)g Ff(bzip2)150 566 y Fj(This)30 b(c)m(hapter)h(con)m(tains)h -(a)e(cop)m(y)h(of)g(the)g Fi(bzip2)d Fj(man)i(page,)h(and)f(nothing)g -(else.)390 818 y Fe(NAME)570 1004 y Fi(bzip2)p Fj(,)f -Fi(bunzip2)g Fj(-)h(a)h(blo)s(c)m(k-sorting)h(\014le)e(compressor,)h -(v1.0.2)570 1136 y Fi(bzcat)e Fj(-)i(decompresses)f(\014les)h(to)g -(stdout)570 1267 y Fi(bzip2recover)c Fj(-)k(reco)m(v)m(ers)h(data)f -(from)f(damaged)g(bzip2)h(\014les)390 1519 y Fe(SYNOPSIS)570 -1706 y Fi(bzip2)e Fj([)i(-cdfkqstvzVL123456789)j(])d([)g(\014lenames)f -(...)41 b(])570 1837 y Fi(bunzip2)28 b Fj([)j(-fkvsVL)f(])g([)h -(\014lenames)f(...)41 b(])570 1968 y Fi(bzcat)29 b Fj([)i(-s)f(])h([)g -(\014lenames)f(...)41 b(])570 2100 y Fi(bzip2recover)27 -b Fj(\014lename)390 2352 y Fe(DESCRIPTION)390 2538 y -Fi(bzip2)e Fj(compresses)i(\014les)f(using)g(the)h(Burro)m(ws-Wheeler)g -(blo)s(c)m(k)f(sorting)i(text)f(compres-)390 2642 y(sion)39 -b(algorithm,)k(and)38 b(Hu\013man)h(co)s(ding.)67 b(Compression)39 -b(is)g(generally)i(considerably)390 2746 y(b)s(etter)23 -b(than)g(that)g(ac)m(hiev)m(ed)h(b)m(y)f(more)g(con)m(v)m(en)m(tional)i -(LZ77/LZ78-based)e(compressors,)390 2850 y(and)k(approac)m(hes)i(the)f -(p)s(erformance)g(of)h(the)f(PPM)h(family)f(of)h(statistical)h -(compressors.)390 3001 y(The)c(command-line)g(options)g(are)g(delib)s -(erately)h(v)m(ery)f(similar)h(to)f(those)h(of)f(GNU)g -Fi(gzip)p Fj(,)390 3104 y(but)k(they)g(are)h(not)g(iden)m(tical.)390 -3255 y Fi(bzip2)e Fj(exp)s(ects)j(a)f(list)h(of)f(\014le)g(names)f(to)i -(accompan)m(y)g(the)f(command-line)g(\015ags.)43 b(Eac)m(h)390 -3359 y(\014le)d(is)h(replaced)g(b)m(y)f(a)g(compressed)h(v)m(ersion)g -(of)f(itself,)k(with)d(the)f(name)g Fi(original_)390 -3463 y(name.bz2)p Fj(.)45 b(Eac)m(h)33 b(compressed)g(\014le)g(has)f -(the)h(same)g(mo)s(di\014cation)g(date,)h(p)s(ermissions,)390 -3567 y(and,)47 b(when)c(p)s(ossible,)k(o)m(wnership)c(as)h(the)g -(corresp)s(onding)g(original,)49 b(so)44 b(that)g(these)390 -3671 y(prop)s(erties)37 b(can)g(b)s(e)g(correctly)i(restored)f(at)g -(decompression)f(time.)62 b(File)39 b(name)d(han-)390 -3774 y(dling)e(is)g(naiv)m(e)h(in)f(the)g(sense)g(that)h(there)f(is)g -(no)g(mec)m(hanism)g(for)h(preserving)f(original)390 -3878 y(\014le)41 b(names,)i(p)s(ermissions,)g(o)m(wnerships)d(or)h -(dates)g(in)g(\014lesystems)g(whic)m(h)f(lac)m(k)i(these)390 -3982 y(concepts,)32 b(or)e(ha)m(v)m(e)i(serious)e(\014le)h(name)f -(length)h(restrictions,)h(suc)m(h)e(as)g(MS-DOS.)390 -4133 y Fi(bzip2)24 b Fj(and)i Fi(bunzip2)d Fj(will)k(b)m(y)f(default)f -(not)i(o)m(v)m(erwrite)h(existing)f(\014les.)39 b(If)26 -b(y)m(ou)g(w)m(an)m(t)h(this)390 4237 y(to)k(happ)s(en,)e(sp)s(ecify)h -(the)h Fi(-f)e Fj(\015ag.)390 4388 y(If)k(no)f(\014le)h(names)f(are)i -(sp)s(eci\014ed,)e Fi(bzip2)g Fj(compresses)h(from)f(standard)g(input)g -(to)i(stan-)390 4491 y(dard)d(output.)44 b(In)32 b(this)f(case,)i -Fi(bzip2)e Fj(will)h(decline)g(to)h(write)f(compressed)g(output)f(to)i -(a)390 4595 y(terminal,)e(as)g(this)f(w)m(ould)g(b)s(e)g(en)m(tirely)i -(incomprehensible)e(and)g(therefore)h(p)s(oin)m(tless.)390 -4746 y Fi(bunzip2)j Fj(\(or)j Fi(bzip2)29 b(-d)p Fj(\))36 -b(decompresses)g(all)h(sp)s(eci\014ed)e(\014les.)58 b(Files)37 -b(whic)m(h)f(w)m(ere)h(not)390 4850 y(created)f(b)m(y)e -Fi(bzip2)g Fj(will)g(b)s(e)g(detected)j(and)c(ignored,)k(and)c(a)i(w)m -(arning)g(issued.)52 b Fi(bzip2)390 4954 y Fj(attempts)29 -b(to)f(guess)g(the)g(\014lename)f(for)h(the)g(decompressed)f(\014le)h -(from)f(that)h(of)g(the)g(com-)390 5058 y(pressed)i(\014le)g(as)h -(follo)m(ws:)570 5209 y Fi(filename.bz2)57 b Fj(b)s(ecomes)31 -b Fi(filename)570 5340 y(filename.bz)58 b Fj(b)s(ecomes)30 -b Fi(filename)p eop -%%Page: 4 5 -4 4 bop 150 -116 a Fl(Chapter)30 b(2:)41 b(Ho)m(w)31 -b(to)g(use)f Fi(bzip2)2375 b Fl(4)570 299 y Fi(filename.tbz2)27 -b Fj(b)s(ecomes)j Fi(filename.tar)570 470 y(filename.tbz)57 -b Fj(b)s(ecomes)31 b Fi(filename.tar)570 641 y(anyothername)57 -b Fj(b)s(ecomes)31 b Fi(anyothername.out)390 859 y Fj(If)i(the)g -(\014le)f(do)s(es)h(not)g(end)f(in)g(one)i(of)e(the)h(recognised)h -(endings,)f Fi(.bz2)p Fj(,)g Fi(.bz)p Fj(,)f Fi(.tbz2)g -Fj(or)390 963 y Fi(.tbz)p Fj(,)h Fi(bzip2)g Fj(complains)g(that)h(it)h -(cannot)f(guess)f(the)h(name)g(of)f(the)h(original)h(\014le,)g(and)390 -1067 y(uses)30 b(the)h(original)g(name)f(with)h Fi(.out)e -Fj(app)s(ended.)390 1218 y(As)g(with)h(compression,)h(supplying)e(no)h -(\014lenames)f(causes)i(decompression)f(from)g(stan-)390 -1321 y(dard)f(input)h(to)h(standard)f(output.)390 1472 -y Fi(bunzip2)h Fj(will)j(correctly)i(decompress)d(a)h(\014le)f(whic)m -(h)g(is)h(the)g(concatenation)h(of)f(t)m(w)m(o)h(or)390 -1576 y(more)i(compressed)g(\014les.)61 b(The)37 b(result)g(is)g(the)g -(concatenation)i(of)e(the)g(corresp)s(onding)390 1680 -y(uncompressed)c(\014les.)52 b(In)m(tegrit)m(y)37 b(testing)e(\()p -Fi(-t)p Fj(\))f(of)h(concatenated)h(compressed)e(\014les)g(is)390 -1784 y(also)d(supp)s(orted.)390 1935 y(Y)-8 b(ou)38 b(can)g(also)g -(compress)g(or)g(decompress)g(\014les)f(to)i(the)f(standard)f(output)g -(b)m(y)h(giving)390 2039 y(the)44 b Fi(-c)e Fj(\015ag.)79 -b(Multiple)44 b(\014les)g(ma)m(y)f(b)s(e)g(compressed)g(and)f -(decompressed)h(lik)m(e)i(this.)390 2142 y(The)31 b(resulting)h -(outputs)e(are)i(fed)f(sequen)m(tially)h(to)g(stdout.)43 -b(Compression)31 b(of)g(m)m(ultiple)390 2246 y(\014les)43 -b(in)g(this)g(manner)g(generates)i(a)e(stream)h(con)m(taining)g(m)m -(ultiple)h(compressed)e(\014le)390 2350 y(represen)m(tations.)48 -b(Suc)m(h)32 b(a)g(stream)h(can)f(b)s(e)g(decompressed)g(correctly)j -(only)d(b)m(y)g Fi(bzip2)390 2454 y Fj(v)m(ersion)h(0.9.0)h(or)f -(later.)48 b(Earlier)33 b(v)m(ersions)g(of)g Fi(bzip2)e -Fj(will)i(stop)g(after)g(decompressing)390 2558 y(the)e(\014rst)f -(\014le)g(in)g(the)h(stream.)390 2709 y Fi(bzcat)e Fj(\(or)i -Fi(bzip2)e(-dc)p Fj(\))h(decompresses)g(all)h(sp)s(eci\014ed)f(\014les) -g(to)i(the)e(standard)g(output.)390 2860 y Fi(bzip2)c -Fj(will)j(read)e(argumen)m(ts)i(from)e(the)h(en)m(vironmen)m(t)h(v)-5 -b(ariables)28 b Fi(BZIP2)f Fj(and)g Fi(BZIP)p Fj(,)g(in)390 -2963 y(that)21 b(order,)i(and)d(will)h(pro)s(cess)f(them)h(b)s(efore)g -(an)m(y)f(argumen)m(ts)h(read)g(from)f(the)h(command)390 -3067 y(line.)41 b(This)30 b(giv)m(es)i(a)e(con)m(v)m(enien)m(t)j(w)m(a) -m(y)e(to)g(supply)e(default)i(argumen)m(ts.)390 3218 -y(Compression)e(is)h(alw)m(a)m(ys)h(p)s(erformed,)e(ev)m(en)h(if)g(the) -g(compressed)f(\014le)h(is)g(sligh)m(tly)h(larger)390 -3322 y(than)24 b(the)h(original.)40 b(Files)25 b(of)f(less)h(than)f(ab) -s(out)g(one)h(h)m(undred)d(b)m(ytes)j(tend)g(to)g(get)h(larger,)390 -3426 y(since)33 b(the)g(compression)f(mec)m(hanism)h(has)f(a)h(constan) -m(t)g(o)m(v)m(erhead)h(in)e(the)h(region)g(of)g(50)390 -3529 y(b)m(ytes.)50 b(Random)33 b(data)h(\(including)f(the)h(output)f -(of)g(most)h(\014le)g(compressors\))g(is)f(co)s(ded)390 -3633 y(at)e(ab)s(out)f(8.05)i(bits)e(p)s(er)g(b)m(yte,)h(giving)h(an)e -(expansion)g(of)g(around)f(0.5\045.)390 3784 y(As)f(a)h(self-c)m(hec)m -(k)i(for)e(y)m(our)g(protection,)i Fi(bzip2)d Fj(uses)g(32-bit)i(CR)m -(Cs)f(to)g(mak)m(e)h(sure)e(that)390 3888 y(the)43 b(decompressed)g(v)m -(ersion)h(of)f(a)g(\014le)f(is)h(iden)m(tical)i(to)f(the)f(original.)79 -b(This)43 b(guards)390 3992 y(against)h(corruption)f(of)h(the)f -(compressed)g(data,)k(and)42 b(against)i(undetected)g(bugs)e(in)390 -4096 y Fi(bzip2)33 b Fj(\(hop)s(efully)i(v)m(ery)g(unlik)m(ely\).)55 -b(The)35 b(c)m(hances)h(of)f(data)g(corruption)g(going)h(unde-)390 -4199 y(tected)e(is)f(microscopic,)h(ab)s(out)e(one)h(c)m(hance)g(in)g -(four)f(billion)g(for)h(eac)m(h)g(\014le)g(pro)s(cessed.)390 -4303 y(Be)j(a)m(w)m(are,)k(though,)d(that)g(the)g(c)m(hec)m(k)h(o)s -(ccurs)e(up)s(on)f(decompression,)j(so)f(it)f(can)h(only)390 -4407 y(tell)28 b(y)m(ou)e(that)h(something)g(is)g(wrong.)39 -b(It)27 b(can't)g(help)f(y)m(ou)h(reco)m(v)m(er)i(the)d(original)i -(uncom-)390 4511 y(pressed)f(data.)40 b(Y)-8 b(ou)27 -b(can)g(use)h Fi(bzip2recover)23 b Fj(to)29 b(try)e(to)i(reco)m(v)m(er) -g(data)f(from)f(damaged)390 4614 y(\014les.)390 4766 -y(Return)42 b(v)-5 b(alues:)65 b(0)42 b(for)h(a)f(normal)g(exit,)47 -b(1)c(for)f(en)m(vironmen)m(tal)i(problems)e(\(\014le)h(not)390 -4869 y(found,)29 b(in)m(v)-5 b(alid)30 b(\015ags,)g(I/O)h(errors,)g -(&c\),)f(2)h(to)f(indicate)h(a)f(corrupt)g(compressed)g(\014le,)h(3)390 -4973 y(for)f(an)g(in)m(ternal)i(consistency)f(error)g(\(eg,)h(bug\))e -(whic)m(h)g(caused)g Fi(bzip2)f Fj(to)i(panic.)390 5304 -y Fe(OPTIONS)p eop -%%Page: 5 6 -5 5 bop 150 -116 a Fl(Chapter)30 b(2:)41 b(Ho)m(w)31 -b(to)g(use)f Fi(bzip2)2375 b Fl(5)390 299 y Fi(-c)30 -b(--stdout)870 403 y Fj(Compress)g(or)g(decompress)h(to)g(standard)e -(output.)390 550 y Fi(-d)h(--decompress)870 653 y Fj(F)-8 -b(orce)42 b(decompression.)72 b Fi(bzip2)p Fj(,)43 b -Fi(bunzip2)38 b Fj(and)i Fi(bzcat)g Fj(are)h(really)h(the)870 -757 y(same)26 b(program,)i(and)d(the)h(decision)h(ab)s(out)f(what)g -(actions)h(to)g(tak)m(e)h(is)e(done)870 861 y(on)j(the)h(basis)f(of)g -(whic)m(h)h(name)f(is)g(used.)40 b(This)29 b(\015ag)g(o)m(v)m(errides)i -(that)f(mec)m(h-)870 965 y(anism,)g(and)g(forces)h(bzip2)f(to)h -(decompress.)390 1112 y Fi(-z)f(--compress)870 1215 y -Fj(The)37 b(complemen)m(t)h(to)g Fi(-d)p Fj(:)54 b(forces)37 -b(compression,)j(regardless)d(of)h(the)f(in-)870 1319 -y(v)m(ok)-5 b(ation)32 b(name.)390 1466 y Fi(-t)e(--test)66 -b Fj(Chec)m(k)26 b(in)m(tegrit)m(y)i(of)e(the)g(sp)s(eci\014ed)f -(\014le\(s\),)i(but)e(don't)h(decompress)g(them.)870 -1570 y(This)39 b(really)h(p)s(erforms)f(a)g(trial)h(decompression)g -(and)e(thro)m(ws)i(a)m(w)m(a)m(y)h(the)870 1674 y(result.)390 -1821 y Fi(-f)30 b(--force)870 1924 y Fj(F)-8 b(orce)27 -b(o)m(v)m(erwrite)h(of)e(output)g(\014les.)39 b(Normally)-8 -b(,)28 b Fi(bzip2)c Fj(will)j(not)f(o)m(v)m(erwrite)870 -2028 y(existing)i(output)e(\014les.)40 b(Also)27 b(forces)g -Fi(bzip2)f Fj(to)h(break)g(hard)f(links)g(to)i(\014les,)870 -2132 y(whic)m(h)i(it)h(otherwise)g(w)m(ouldn't)g(do.)870 -2257 y Fi(bzip2)h Fj(normally)h(declines)h(to)f(decompress)h(\014les)f -(whic)m(h)f(don't)i(ha)m(v)m(e)g(the)870 2361 y(correct)39 -b(magic)g(header)e(b)m(ytes.)63 b(If)38 b(forced)g(\()p -Fi(-f)p Fj(\),)h(ho)m(w)m(ev)m(er,)i(it)e(will)f(pass)870 -2465 y(suc)m(h)30 b(\014les)g(through)g(unmo)s(di\014ed.)39 -b(This)30 b(is)h(ho)m(w)f(GNU)h Fi(gzip)e Fj(b)s(eha)m(v)m(es.)390 -2612 y Fi(-k)h(--keep)66 b Fj(Keep)34 b(\(don't)g(delete\))h(input)d -(\014les)i(during)e(compression)i(or)f(decompres-)870 -2716 y(sion.)390 2863 y Fi(-s)d(--small)870 2966 y Fj(Reduce)35 -b(memory)g(usage,)i(for)e(compression,)i(decompression)e(and)f(test-) -870 3070 y(ing.)40 b(Files)29 b(are)f(decompressed)g(and)f(tested)i -(using)f(a)g(mo)s(di\014ed)f(algorithm)870 3174 y(whic)m(h)32 -b(only)g(requires)h(2.5)g(b)m(ytes)g(p)s(er)f(blo)s(c)m(k)g(b)m(yte.)48 -b(This)32 b(means)f(an)m(y)i(\014le)870 3278 y(can)d(b)s(e)f -(decompressed)g(in)h(2300k)h(of)f(memory)-8 b(,)31 b(alb)s(eit)f(at)g -(ab)s(out)f(half)h(the)870 3381 y(normal)g(sp)s(eed.)870 -3507 y(During)35 b(compression,)j Fi(-s)d Fj(selects)j(a)e(blo)s(c)m(k) -h(size)f(of)h(200k,)h(whic)m(h)e(limits)870 3611 y(memory)43 -b(use)f(to)h(around)e(the)i(same)f(\014gure,)k(at)d(the)f(exp)s(ense)g -(of)h(y)m(our)870 3714 y(compression)32 b(ratio.)47 b(In)31 -b(short,)i(if)e(y)m(our)i(mac)m(hine)f(is)g(lo)m(w)g(on)g(memory)g(\(8) -870 3818 y(megab)m(ytes)39 b(or)e(less\),)j(use)d(-s)g(for)g(ev)m -(erything.)62 b(See)38 b(MEMOR)-8 b(Y)37 b(MAN-)870 3922 -y(A)m(GEMENT)31 b(b)s(elo)m(w.)390 4069 y Fi(-q)f(--quiet)870 -4173 y Fj(Suppress)h(non-essen)m(tial)j(w)m(arning)f(messages.)49 -b(Messages)34 b(p)s(ertaining)f(to)870 4276 y(I/O)e(errors)g(and)e -(other)i(critical)i(ev)m(en)m(ts)f(will)e(not)h(b)s(e)f(suppressed.)390 -4423 y Fi(-v)g(--verbose)870 4527 y Fj(V)-8 b(erb)s(ose)25 -b(mo)s(de)f({)h(sho)m(w)f(the)h(compression)g(ratio)g(for)g(eac)m(h)g -(\014le)g(pro)s(cessed.)870 4631 y(F)-8 b(urther)27 b -Fi(-v)p Fj('s)g(increase)h(the)g(v)m(erb)s(osit)m(y)g(lev)m(el,)i(sp)s -(ewing)d(out)g(lots)h(of)g(infor-)870 4735 y(mation)j(whic)m(h)f(is)g -(primarily)h(of)f(in)m(terest)i(for)f(diagnostic)g(purp)s(oses.)390 -4882 y Fi(-L)f(--license)e(-V)h(--version)870 4985 y -Fj(Displa)m(y)i(the)f(soft)m(w)m(are)i(v)m(ersion,)g(license)f(terms)g -(and)e(conditions.)390 5132 y Fi(-1)h(\(or)f(--fast\))g(to)g(-9)h(\(or) -g(--best\))870 5236 y Fj(Set)h(the)g(blo)s(c)m(k)g(size)g(to)h(100)g -(k,)e(200)i(k)f(..)41 b(900)32 b(k)f(when)e(compressing.)42 -b(Has)870 5340 y(no)37 b(e\013ect)i(when)d(decompressing.)62 -b(See)37 b(MEMOR)-8 b(Y)38 b(MANA)m(GEMENT)p eop -%%Page: 6 7 -6 6 bop 150 -116 a Fl(Chapter)30 b(2:)41 b(Ho)m(w)31 -b(to)g(use)f Fi(bzip2)2375 b Fl(6)870 299 y Fj(b)s(elo)m(w.)83 -b(The)44 b Fi(--fast)f Fj(and)g Fi(--best)g Fj(aliases)i(are)g -(primarily)f(for)g(GNU)870 403 y Fi(gzip)27 b Fj(compatibilit)m(y)-8 -b(.)43 b(In)28 b(particular,)i Fi(--fast)d Fj(do)s(esn't)h(mak)m(e)i -(things)e(sig-)870 506 y(ni\014can)m(tly)d(faster.)40 -b(And)23 b Fi(--best)g Fj(merely)i(selects)h(the)f(default)g(b)s(eha)m -(viour.)390 674 y Fi(--)384 b Fj(T)-8 b(reats)38 b(all)f(subsequen)m(t) -f(argumen)m(ts)g(as)h(\014le)f(names,)i(ev)m(en)f(if)f(they)h(start)870 -778 y(with)31 b(a)h(dash.)42 b(This)31 b(is)h(so)f(y)m(ou)h(can)f -(handle)g(\014les)g(with)h(names)f(b)s(eginning)870 881 -y(with)f(a)h(dash,)e(for)i(example:)41 b Fi(bzip2)29 -b(--)h(-myfilename)p Fj(.)390 1049 y Fi(--repetitive-fast)390 -1185 y(--repetitive-best)870 1288 y Fj(These)j(\015ags)g(are)g -(redundan)m(t)f(in)h(v)m(ersions)g(0.9.5)i(and)d(ab)s(o)m(v)m(e.)49 -b(They)33 b(pro-)870 1392 y(vided)g(some)h(coarse)h(con)m(trol)g(o)m(v) -m(er)g(the)f(b)s(eha)m(viour)f(of)h(the)g(sorting)g(algo-)870 -1496 y(rithm)j(in)g(earlier)h(v)m(ersions,)i(whic)m(h)d(w)m(as)g -(sometimes)h(useful.)61 b(0.9.5)39 b(and)870 1600 y(ab)s(o)m(v)m(e)31 -b(ha)m(v)m(e)f(an)g(impro)m(v)m(ed)g(algorithm)h(whic)m(h)e(renders)g -(these)h(\015ags)g(irrel-)870 1704 y(ev)-5 b(an)m(t.)390 -1964 y Fe(MEMOR)-10 b(Y)40 b(MANA)m(GEMENT)390 2151 y -Fi(bzip2)23 b Fj(compresses)i(large)h(\014les)e(in)g(blo)s(c)m(ks.)40 -b(The)24 b(blo)s(c)m(k)h(size)h(a\013ects)g(b)s(oth)e(the)g(compres-) -390 2255 y(sion)38 b(ratio)g(ac)m(hiev)m(ed,)k(and)36 -b(the)i(amoun)m(t)g(of)g(memory)g(needed)f(for)h(compression)g(and)390 -2358 y(decompression.)57 b(The)35 b(\015ags)h Fi(-1)f -Fj(through)g Fi(-9)g Fj(sp)s(ecify)g(the)h(blo)s(c)m(k)g(size)h(to)f(b) -s(e)f(100,000)390 2462 y(b)m(ytes)25 b(through)f(900,000)j(b)m(ytes)e -(\(the)h(default\))e(resp)s(ectiv)m(ely)-8 b(.)41 b(A)m(t)25 -b(decompression)g(time,)390 2566 y(the)k(blo)s(c)m(k)h(size)g(used)f -(for)g(compression)g(is)g(read)h(from)f(the)g(header)g(of)g(the)h -(compressed)390 2670 y(\014le,)j(and)f Fi(bunzip2)f Fj(then)h(allo)s -(cates)i(itself)g(just)e(enough)g(memory)h(to)g(decompress)g(the)390 -2773 y(\014le.)40 b(Since)27 b(blo)s(c)m(k)h(sizes)h(are)f(stored)g(in) -f(compressed)g(\014les,)h(it)h(follo)m(ws)f(that)g(the)g(\015ags)g -Fi(-1)390 2877 y Fj(to)j Fi(-9)f Fj(are)h(irrelev)-5 -b(an)m(t)32 b(to)f(and)f(so)g(ignored)h(during)e(decompression.)390 -3028 y(Compression)h(and)f(decompression)i(requiremen)m(ts,)g(in)f(b)m -(ytes,)i(can)e(b)s(e)g(estimated)i(as:)869 3179 y Fi(Compression:)140 -b(400k)46 b(+)i(\()f(8)h(x)f(block)f(size)h(\))869 3387 -y(Decompression:)d(100k)i(+)i(\()f(4)h(x)f(block)f(size)h(\),)g(or)1585 -3491 y(100k)f(+)i(\()f(2.5)g(x)g(block)g(size)f(\))390 -3642 y Fj(Larger)27 b(blo)s(c)m(k)f(sizes)h(giv)m(e)h(rapidly)e -(diminishing)f(marginal)i(returns.)39 b(Most)27 b(of)f(the)h(com-)390 -3745 y(pression)e(comes)i(from)f(the)g(\014rst)f(t)m(w)m(o)i(or)f -(three)h(h)m(undred)d(k)h(of)h(blo)s(c)m(k)g(size,)i(a)e(fact)h(w)m -(orth)390 3849 y(b)s(earing)32 b(in)g(mind)f(when)g(using)h -Fi(bzip2)f Fj(on)h(small)g(mac)m(hines.)47 b(It)32 b(is)h(also)f(imp)s -(ortan)m(t)h(to)390 3953 y(appreciate)f(that)g(the)f(decompression)g -(memory)g(requiremen)m(t)h(is)f(set)g(at)h(compression)390 -4057 y(time)f(b)m(y)f(the)h(c)m(hoice)h(of)f(blo)s(c)m(k)g(size.)390 -4208 y(F)-8 b(or)43 b(\014les)f(compressed)h(with)f(the)g(default)h -(900k)g(blo)s(c)m(k)g(size,)k Fi(bunzip2)40 b Fj(will)j(require)390 -4311 y(ab)s(out)28 b(3700)i(kb)m(ytes)f(to)h(decompress.)40 -b(T)-8 b(o)29 b(supp)s(ort)e(decompression)i(of)g(an)m(y)f(\014le)h(on) -f(a)h(4)390 4415 y(megab)m(yte)h(mac)m(hine,)g Fi(bunzip2)c -Fj(has)i(an)h(option)g(to)g(decompress)g(using)f(appro)m(ximately)390 -4519 y(half)33 b(this)g(amoun)m(t)h(of)f(memory)-8 b(,)35 -b(ab)s(out)d(2300)j(kb)m(ytes.)50 b(Decompression)34 -b(sp)s(eed)e(is)h(also)390 4623 y(halv)m(ed,)h(so)f(y)m(ou)g(should)e -(use)i(this)f(option)h(only)g(where)g(necessary)-8 b(.)48 -b(The)33 b(relev)-5 b(an)m(t)34 b(\015ag)390 4727 y(is)c -Fi(-s)p Fj(.)390 4878 y(In)i(general,)i(try)f(and)e(use)h(the)h -(largest)h(blo)s(c)m(k)f(size)g(memory)g(constrain)m(ts)g(allo)m(w,)i -(since)390 4981 y(that)43 b(maximises)g(the)g(compression)f(ac)m(hiev)m -(ed.)79 b(Compression)42 b(and)g(decompression)390 5085 -y(sp)s(eed)30 b(are)g(virtually)h(una\013ected)g(b)m(y)f(blo)s(c)m(k)h -(size.)390 5236 y(Another)24 b(signi\014can)m(t)i(p)s(oin)m(t)e -(applies)g(to)i(\014les)e(whic)m(h)g(\014t)h(in)f(a)g(single)h(blo)s(c) -m(k)g({)g(that)g(means)390 5340 y(most)g(\014les)f(y)m(ou'd)h(encoun)m -(ter)g(using)f(a)g(large)i(blo)s(c)m(k)f(size.)39 b(The)25 -b(amoun)m(t)f(of)h(real)g(memory)p eop -%%Page: 7 8 -7 7 bop 150 -116 a Fl(Chapter)30 b(2:)41 b(Ho)m(w)31 -b(to)g(use)f Fi(bzip2)2375 b Fl(7)390 299 y Fj(touc)m(hed)36 -b(is)g(prop)s(ortional)g(to)g(the)g(size)h(of)f(the)g(\014le,)h(since)f -(the)g(\014le)g(is)f(smaller)i(than)e(a)390 403 y(blo)s(c)m(k.)48 -b(F)-8 b(or)33 b(example,)h(compressing)f(a)g(\014le)f(20,000)j(b)m -(ytes)f(long)f(with)f(the)h(\015ag)g Fi(-9)f Fj(will)390 -506 y(cause)26 b(the)g(compressor)f(to)i(allo)s(cate)g(around)e(7600k)i -(of)f(memory)-8 b(,)27 b(but)e(only)g(touc)m(h)i(400k)390 -610 y Fi(+)h Fj(20000)i(*)f(8)g(=)f(560)h(kb)m(ytes)g(of)g(it.)40 -b(Similarly)-8 b(,)30 b(the)f(decompressor)f(will)h(allo)s(cate)h -(3700k)390 714 y(but)g(only)g(touc)m(h)h(100k)h Fi(+)e -Fj(20000)j(*)d(4)h(=)f(180)i(kb)m(ytes.)390 865 y(Here)39 -b(is)f(a)g(table)h(whic)m(h)f(summarises)f(the)i(maxim)m(um)e(memory)h -(usage)h(for)f(di\013eren)m(t)390 969 y(blo)s(c)m(k)c(sizes.)50 -b(Also)33 b(recorded)g(is)h(the)f(total)i(compressed)e(size)h(for)g(14) -g(\014les)f(of)g(the)h(Cal-)390 1073 y(gary)39 b(T)-8 -b(ext)39 b(Compression)f(Corpus)g(totalling)i(3,141,622)i(b)m(ytes.)65 -b(This)38 b(column)h(giv)m(es)390 1176 y(some)g(feel)f(for)h(ho)m(w)f -(compression)g(v)-5 b(aries)39 b(with)f(blo)s(c)m(k)g(size.)65 -b(These)38 b(\014gures)g(tend)g(to)390 1280 y(understate)28 -b(the)g(adv)-5 b(an)m(tage)29 b(of)f(larger)h(blo)s(c)m(k)g(sizes)f -(for)g(larger)h(\014les,)g(since)f(the)g(Corpus)390 1384 -y(is)i(dominated)h(b)m(y)f(smaller)h(\014les.)1107 1535 -y Fi(Compress)141 b(Decompress)g(Decompress)f(Corpus)773 -1639 y(Flag)238 b(usage)285 b(usage)332 b(-s)48 b(usage)237 -b(Size)821 1846 y(-1)286 b(1200k)332 b(500k)429 b(350k)285 -b(914704)821 1950 y(-2)h(2000k)332 b(900k)429 b(600k)285 -b(877703)821 2054 y(-3)h(2800k)f(1300k)428 b(850k)285 -b(860338)821 2158 y(-4)h(3600k)f(1700k)380 b(1100k)285 -b(846899)821 2261 y(-5)h(4400k)f(2100k)380 b(1350k)285 -b(845160)821 2365 y(-6)h(5200k)f(2500k)380 b(1600k)285 -b(838626)821 2469 y(-7)h(6100k)f(2900k)380 b(1850k)285 -b(834096)821 2573 y(-8)h(6800k)f(3300k)380 b(2100k)285 -b(828642)821 2676 y(-9)h(7600k)f(3700k)380 b(2350k)285 -b(828642)390 2992 y Fe(RECO)m(VERING)38 b(D)m(A)-10 b(T)g(A)40 -b(FR)m(OM)h(D)m(AMA)m(GED)f(FILES)390 3179 y Fi(bzip2)24 -b Fj(compresses)i(\014les)g(in)f(blo)s(c)m(ks,)i(usually)e(900kb)m -(ytes)j(long.)40 b(Eac)m(h)25 b(blo)s(c)m(k)h(is)g(handled)390 -3283 y(indep)s(enden)m(tly)-8 b(.)42 b(If)31 b(a)g(media)g(or)h -(transmission)f(error)g(causes)g(a)g(m)m(ulti-blo)s(c)m(k)i -Fi(.bz2)d Fj(\014le)390 3387 y(to)35 b(b)s(ecome)f(damaged,)i(it)e(ma)m -(y)h(b)s(e)e(p)s(ossible)h(to)h(reco)m(v)m(er)h(data)f(from)f(the)g -(undamaged)390 3491 y(blo)s(c)m(ks)d(in)f(the)h(\014le.)390 -3642 y(The)g(compressed)g(represen)m(tation)i(of)f(eac)m(h)g(blo)s(c)m -(k)g(is)f(delimited)h(b)m(y)f(a)h(48-bit)g(pattern,)390 -3745 y(whic)m(h)27 b(mak)m(es)g(it)h(p)s(ossible)e(to)i(\014nd)d(the)j -(blo)s(c)m(k)f(b)s(oundaries)e(with)i(reasonable)h(certain)m(t)m(y)-8 -b(.)390 3849 y(Eac)m(h)33 b(blo)s(c)m(k)g(also)h(carries)g(its)f(o)m -(wn)g(32-bit)h(CR)m(C,)f(so)g(damaged)g(blo)s(c)m(ks)h(can)f(b)s(e)f -(distin-)390 3953 y(guished)e(from)g(undamaged)f(ones.)390 -4104 y Fi(bzip2recover)36 b Fj(is)j(a)g(simple)f(program)h(whose)g -(purp)s(ose)f(is)h(to)g(searc)m(h)h(for)f(blo)s(c)m(ks)g(in)390 -4208 y Fi(.bz2)33 b Fj(\014les,)j(and)e(write)h(eac)m(h)h(blo)s(c)m(k)f -(out)g(in)m(to)g(its)g(o)m(wn)g Fi(.bz2)e Fj(\014le.)54 -b(Y)-8 b(ou)34 b(can)h(then)f(use)390 4311 y Fi(bzip2)29 -b(-t)23 b Fj(to)h(test)h(the)f(in)m(tegrit)m(y)i(of)d(the)h(resulting)g -(\014les,)h(and)e(decompress)g(those)h(whic)m(h)390 4415 -y(are)31 b(undamaged.)390 4566 y Fi(bzip2recover)40 b -Fj(tak)m(es)45 b(a)f(single)g(argumen)m(t,)j(the)d(name)g(of)f(the)h -(damaged)g(\014le,)j(and)390 4670 y(writes)33 b(a)f(n)m(um)m(b)s(er)f -(of)h(\014les)h Fi(rec00001file.bz2)p Fj(,)28 b Fi(rec00002file.bz2)p -Fj(,)g(etc,)34 b(con)m(tain-)390 4774 y(ing)29 b(the)g(extracted)i(blo) -s(c)m(ks.)40 b(The)29 b(output)g(\014lenames)g(are)g(designed)g(so)g -(that)g(the)g(use)g(of)390 4878 y(wildcards)h(in)f(subsequen)m(t)h(pro) -s(cessing)g({)h(for)f(example,)h Fi(bzip2)e(-dc)g(rec*file.bz2)e(>)390 -4981 y(recovered_data)f Fj({)31 b(pro)s(cesses)g(the)f(\014les)h(in)f -(the)h(correct)h(order.)390 5132 y Fi(bzip2recover)37 -b Fj(should)i(b)s(e)h(of)h(most)f(use)g(dealing)h(with)f(large)i -Fi(.bz2)d Fj(\014les,)k(as)d(these)390 5236 y(will)31 -b(con)m(tain)i(man)m(y)e(blo)s(c)m(ks.)43 b(It)32 b(is)f(clearly)h -(futile)g(to)g(use)f(it)g(on)g(damaged)g(single-blo)s(c)m(k)390 -5340 y(\014les,)g(since)g(a)g(damaged)f(blo)s(c)m(k)h(cannot)h(b)s(e)e -(reco)m(v)m(ered.)43 b(If)31 b(y)m(ou)g(wish)f(to)h(minimise)g(an)m(y)p -eop -%%Page: 8 9 -8 8 bop 150 -116 a Fl(Chapter)30 b(2:)41 b(Ho)m(w)31 -b(to)g(use)f Fi(bzip2)2375 b Fl(8)390 299 y Fj(p)s(oten)m(tial)36 -b(data)f(loss)f(through)g(media)h(or)g(transmission)f(errors,)i(y)m(ou) -e(migh)m(t)i(consider)390 403 y(compressing)31 b(with)f(a)g(smaller)h -(blo)s(c)m(k)g(size.)390 711 y Fe(PERF)m(ORMANCE)39 b(NOTES)390 -897 y Fj(The)c(sorting)i(phase)e(of)g(compression)h(gathers)g(together) -i(similar)e(strings)g(in)f(the)h(\014le.)390 1001 y(Because)51 -b(of)g(this,)57 b(\014les)51 b(con)m(taining)h(v)m(ery)f(long)h(runs)d -(of)j(rep)s(eated)f(sym)m(b)s(ols,)56 b(lik)m(e)390 1105 -y Fi(")p Fj(aabaabaabaab)c(...)p Fi(")h Fj(\(rep)s(eated)g(sev)m(eral)h -(h)m(undred)c(times\))k(ma)m(y)f(compress)f(more)390 -1209 y(slo)m(wly)33 b(than)f(normal.)45 b(V)-8 b(ersions)33 -b(0.9.5)g(and)e(ab)s(o)m(v)m(e)i(fare)g(m)m(uc)m(h)f(b)s(etter)g(than)g -(previous)390 1312 y(v)m(ersions)54 b(in)g(this)g(resp)s(ect.)111 -b(The)54 b(ratio)h(b)s(et)m(w)m(een)g(w)m(orst-case)g(and)e(a)m(v)m -(erage-case)390 1416 y(compression)39 b(time)g(is)g(in)f(the)h(region)h -(of)e(10:1.)67 b(F)-8 b(or)40 b(previous)e(v)m(ersions,)k(this)c -(\014gure)390 1520 y(w)m(as)j(more)g(lik)m(e)g(100:1.)74 -b(Y)-8 b(ou)40 b(can)h(use)f(the)h Fi(-vvvv)e Fj(option)i(to)g(monitor) -g(progress)g(in)390 1624 y(great)32 b(detail,)f(if)g(y)m(ou)f(w)m(an)m -(t.)390 1775 y(Decompression)h(sp)s(eed)e(is)i(una\013ected)g(b)m(y)f -(these)h(phenomena.)390 1926 y Fi(bzip2)g Fj(usually)i(allo)s(cates)i -(sev)m(eral)f(megab)m(ytes)g(of)f(memory)g(to)h(op)s(erate)g(in,)f(and) -f(then)390 2030 y(c)m(harges)37 b(all)f(o)m(v)m(er)h(it)f(in)f(a)h -(fairly)g(random)f(fashion.)55 b(This)36 b(means)f(that)h(p)s -(erformance,)390 2133 y(b)s(oth)f(for)g(compressing)h(and)e -(decompressing,)j(is)f(largely)g(determined)g(b)m(y)f(the)h(sp)s(eed) -390 2237 y(at)27 b(whic)m(h)e(y)m(our)h(mac)m(hine)h(can)f(service)h -(cac)m(he)g(misses.)39 b(Because)27 b(of)f(this,)h(small)f(c)m(hanges) -390 2341 y(to)31 b(the)f(co)s(de)h(to)g(reduce)f(the)h(miss)e(rate)i -(ha)m(v)m(e)g(b)s(een)f(observ)m(ed)h(to)g(giv)m(e)g(disprop)s(ortion-) -390 2445 y(ately)i(large)h(p)s(erformance)e(impro)m(v)m(emen)m(ts.)47 -b(I)33 b(imagine)g Fi(bzip2)d Fj(will)j(p)s(erform)e(b)s(est)h(on)390 -2548 y(mac)m(hines)f(with)f(v)m(ery)h(large)g(cac)m(hes.)390 -2856 y Fe(CA)-14 b(VEA)k(TS)390 3043 y Fj(I/O)45 b(error)f(messages)h -(are)f(not)h(as)f(helpful)f(as)h(they)g(could)g(b)s(e.)81 -b Fi(bzip2)43 b Fj(tries)i(hard)390 3147 y(to)37 b(detect)g(I/O)f -(errors)h(and)d(exit)j(cleanly)-8 b(,)39 b(but)c(the)h(details)h(of)f -(what)f(the)h(problem)g(is)390 3251 y(sometimes)c(seem)e(rather)h -(misleading.)390 3402 y(This)c(man)m(ual)f(page)i(p)s(ertains)e(to)i(v) -m(ersion)g(1.0.2)h(of)e Fi(bzip2)p Fj(.)38 b(Compressed)26 -b(data)i(created)390 3505 y(b)m(y)22 b(this)h(v)m(ersion)g(is)f(en)m -(tirely)i(forw)m(ards)f(and)e(bac)m(kw)m(ards)i(compatible)g(with)f -(the)h(previous)390 3609 y(public)g(releases,)k(v)m(ersions)d(0.1pl2,)i -(0.9.0,)h(0.9.5,)g(1.0.0)f(and)d(1.0.1,)k(but)c(with)g(the)h(follo)m -(w-)390 3713 y(ing)h(exception:)39 b(0.9.0)27 b(and)c(ab)s(o)m(v)m(e)j -(can)f(correctly)i(decompress)d(m)m(ultiple)i(concatenated)390 -3817 y(compressed)36 b(\014les.)59 b(0.1pl2)37 b(cannot)g(do)f(this;)k -(it)d(will)f(stop)h(after)g(decompressing)f(just)390 -3921 y(the)31 b(\014rst)f(\014le)g(in)g(the)h(stream.)390 -4072 y Fi(bzip2recover)19 b Fj(v)m(ersions)j(prior)g(to)h(this)f(one,)i -(1.0.2,)h(used)c(32-bit)j(in)m(tegers)f(to)g(represen)m(t)390 -4175 y(bit)32 b(p)s(ositions)g(in)f(compressed)h(\014les,)g(so)g(it)g -(could)g(not)g(handle)f(compressed)h(\014les)f(more)390 -4279 y(than)39 b(512)h(megab)m(ytes)h(long.)68 b(V)-8 -b(ersion)39 b(1.0.2)i(and)d(ab)s(o)m(v)m(e)j(uses)d(64-bit)j(in)m(ts)e -(on)h(some)390 4383 y(platforms)e(whic)m(h)g(supp)s(ort)f(them)i(\(GNU) -g(supp)s(orted)e(targets,)42 b(and)c(Windo)m(ws\).)64 -b(T)-8 b(o)390 4487 y(establish)27 b(whether)g(or)f(not)i -Fi(bzip2recover)23 b Fj(w)m(as)k(built)f(with)h(suc)m(h)f(a)h -(limitation,)j(run)25 b(it)390 4591 y(without)33 b(argumen)m(ts.)50 -b(In)33 b(an)m(y)g(ev)m(en)m(t)i(y)m(ou)e(can)h(build)e(y)m(ourself)h -(an)g(unlimited)g(v)m(ersion)390 4694 y(if)28 b(y)m(ou)h(can)g -(recompile)h(it)f(with)f Fi(MaybeUInt64)e Fj(set)j(to)g(b)s(e)f(an)g -(unsigned)g(64-bit)i(in)m(teger.)390 5002 y Fe(A)m(UTHOR)390 -5189 y Fj(Julian)g(Sew)m(ard,)g Fi(jseward@acm.org)p -Fj(.)390 5340 y Fi(http://sources.redhat.co)o(m/bz)o(ip2)p -eop -%%Page: 9 10 -9 9 bop 150 -116 a Fl(Chapter)30 b(2:)41 b(Ho)m(w)31 -b(to)g(use)f Fi(bzip2)2375 b Fl(9)390 299 y Fj(The)22 -b(ideas)h(em)m(b)s(o)s(died)f(in)g Fi(bzip2)f Fj(are)i(due)e(to)j(\(at) -f(least\))h(the)f(follo)m(wing)g(p)s(eople:)37 b(Mic)m(hael)390 -403 y(Burro)m(ws)45 b(and)g(Da)m(vid)h(Wheeler)g(\(for)g(the)g(blo)s(c) -m(k)h(sorting)f(transformation\),)51 b(Da)m(vid)390 506 -y(Wheeler)42 b(\(again,)j(for)c(the)h(Hu\013man)e(co)s(der\),)45 -b(P)m(eter)e(F)-8 b(en)m(wic)m(k)43 b(\(for)e(the)h(structured)390 -610 y(co)s(ding)34 b(mo)s(del)h(in)f(the)g(original)i -Fi(bzip)p Fj(,)e(and)g(man)m(y)g(re\014nemen)m(ts\),)i(and)e(Alistair)h -(Mof-)390 714 y(fat,)45 b(Radford)d(Neal)g(and)f(Ian)h(Witten)h(\(for)f -(the)h(arithmetic)g(co)s(der)f(in)g(the)g(original)390 -818 y Fi(bzip)p Fj(\).)d(I)26 b(am)h(m)m(uc)m(h)g(indebted)f(for)h -(their)g(help,)g(supp)s(ort)e(and)h(advice.)40 b(See)27 -b(the)g(man)m(ual)390 922 y(in)k(the)h(source)f(distribution)h(for)f(p) -s(oin)m(ters)h(to)g(sources)f(of)h(do)s(cumen)m(tation.)44 -b(Christian)390 1025 y(v)m(on)27 b(Ro)s(ques)f(encouraged)g(me)h(to)g -(lo)s(ok)g(for)f(faster)h(sorting)g(algorithms,)h(so)e(as)h(to)g(sp)s -(eed)390 1129 y(up)32 b(compression.)50 b(Bela)34 b(Lubkin)d -(encouraged)j(me)g(to)g(impro)m(v)m(e)g(the)g(w)m(orst-case)h(com-)390 -1233 y(pression)e(p)s(erformance.)51 b(The)34 b Fi(bz*)f -Fj(scripts)h(are)g(deriv)m(ed)g(from)g(those)g(of)g(GNU)h -Fi(gzip)p Fj(.)390 1337 y(Man)m(y)40 b(p)s(eople)f(sen)m(t)h(patc)m -(hes,)i(help)s(ed)c(with)h(p)s(ortabilit)m(y)i(problems,)g(len)m(t)f -(mac)m(hines,)390 1440 y(ga)m(v)m(e)32 b(advice)f(and)f(w)m(ere)h -(generally)h(helpful.)p eop -%%Page: 10 11 -10 10 bop 150 -116 a Fl(Chapter)30 b(3:)41 b(Programming)29 -b(with)g Fi(libbzip2)1881 b Fl(10)150 299 y Fg(3)80 b(Programming)53 -b(with)h Ff(libbzip2)150 568 y Fj(This)30 b(c)m(hapter)h(describ)s(es)f -(the)h(programming)f(in)m(terface)i(to)g Fi(libbzip2)p -Fj(.)150 725 y(F)-8 b(or)33 b(general)g(bac)m(kground)f(information,)h -(particularly)f(ab)s(out)g(memory)g(use)g(and)f(p)s(erformance)h(as-) -150 834 y(p)s(ects,)f(y)m(ou'd)f(b)s(e)g(w)m(ell)h(advised)f(to)i(read) -e(Chapter)g(2)h(as)f(w)m(ell.)150 1124 y Fk(3.1)68 b(T)-11 -b(op-lev)l(el)46 b(structure)150 1316 y Fi(libbzip2)32 -b Fj(is)i(a)g(\015exible)h(library)f(for)g(compressing)g(and)g -(decompressing)g(data)h(in)e(the)i Fi(bzip2)e Fj(data)150 -1426 y(format.)38 b(Although)22 b(pac)m(k)-5 b(aged)24 -b(as)e(a)g(single)h(en)m(tit)m(y)-8 b(,)26 b(it)d(helps)f(to)h(regard)f -(the)h(library)f(as)g(three)h(separate)150 1535 y(parts:)41 -b(the)30 b(lo)m(w)h(lev)m(el)i(in)m(terface,)f(and)d(the)i(high)f(lev)m -(el)i(in)m(terface,)h(and)c(some)i(utilit)m(y)h(functions.)150 -1692 y(The)37 b(structure)g(of)g Fi(libbzip2)p Fj('s)e(in)m(terfaces)j -(is)f(similar)g(to)h(that)f(of)g(Jean-loup)g(Gailly's)i(and)d(Mark)150 -1802 y(Adler's)30 b(excellen)m(t)j Fi(zlib)c Fj(library)-8 -b(.)150 1959 y(All)29 b(externally)h(visible)g(sym)m(b)s(ols)e(ha)m(v)m -(e)i(names)f(b)s(eginning)f Fi(BZ2_)p Fj(.)39 b(This)29 -b(is)g(new)f(in)h(v)m(ersion)h(1.0.)41 b(The)150 2068 -y(in)m(ten)m(tion)32 b(is)f(to)g(minimise)f(p)s(ollution)h(of)f(the)h -(namespaces)f(of)h(library)f(clien)m(ts.)150 2321 y Fe(3.1.1)63 -b(Lo)m(w-lev)m(el)39 b(summary)150 2514 y Fj(This)52 -b(in)m(terface)i(pro)m(vides)e(services)h(for)f(compressing)g(and)g -(decompressing)g(data)g(in)g(memory)-8 b(.)150 2623 y(There's)42 -b(no)g(pro)m(vision)g(for)g(dealing)g(with)f(\014les,)k(streams)d(or)g -(an)m(y)g(other)g(I/O)h(mec)m(hanisms,)h(just)150 2733 -y(straigh)m(t)51 b(memory-to-memory)g(w)m(ork.)98 b(In)48 -b(fact,)55 b(this)49 b(part)h(of)f(the)h(library)f(can)g(b)s(e)f -(compiled)150 2843 y(without)31 b(inclusion)f(of)g Fi(stdio.h)p -Fj(,)f(whic)m(h)h(ma)m(y)h(b)s(e)f(helpful)f(for)i(em)m(b)s(edded)e -(applications.)150 2999 y(The)h(lo)m(w-lev)m(el)k(part)c(of)h(the)f -(library)g(has)g(no)h(global)g(v)-5 b(ariables)31 b(and)e(is)i -(therefore)g(thread-safe.)150 3156 y(Six)26 b(routines)h(mak)m(e)h(up)e -(the)h(lo)m(w)g(lev)m(el)i(in)m(terface:)41 b Fi(BZ2_bzCompressInit)p -Fj(,)22 b Fi(BZ2_bzCompress)p Fj(,)i(and)150 3266 y Fi -(BZ2_bzCompressEnd)i Fj(for)k(compression,)h(and)f(a)g(corresp)s -(onding)g(trio)h Fi(BZ2_bzDecompressInit)p Fj(,)150 3375 -y Fi(BZ2_bzDecompress)36 b Fj(and)j Fi(BZ2_bzDecompressEnd)c -Fj(for)41 b(decompression.)70 b(The)40 b Fi(*Init)f Fj(functions)150 -3485 y(allo)s(cate)k(memory)e(for)g(compression/decompression)h(and)e -(do)h(other)h(initialisations,)k(whilst)41 b(the)150 -3595 y Fi(*End)29 b Fj(functions)h(close)i(do)m(wn)e(op)s(erations)h -(and)e(release)j(memory)-8 b(.)150 3751 y(The)35 b(real)h(w)m(ork)f(is) -g(done)g(b)m(y)g Fi(BZ2_bzCompress)d Fj(and)i Fi(BZ2_bzDecompress)p -Fj(.)50 b(These)36 b(compress)f(and)150 3861 y(decompress)28 -b(data)g(from)f(a)h(user-supplied)e(input)h(bu\013er)g(to)h(a)g -(user-supplied)e(output)i(bu\013er.)39 b(These)150 3971 -y(bu\013ers)31 b(can)g(b)s(e)g(an)m(y)h(size;)h(arbitrary)f(quan)m -(tities)h(of)f(data)g(are)g(handled)e(b)m(y)i(making)f(rep)s(eated)h -(calls)150 4080 y(to)g(these)g(functions.)43 b(This)31 -b(is)g(a)h(\015exible)f(mec)m(hanism)h(allo)m(wing)g(a)g(consumer-pull) -f(st)m(yle)h(of)g(activit)m(y)-8 b(,)150 4190 y(or)31 -b(pro)s(ducer-push,)d(or)j(a)f(mixture)h(of)f(b)s(oth.)150 -4443 y Fe(3.1.2)63 b(High-lev)m(el)41 b(summary)150 4635 -y Fj(This)c(in)m(terface)i(pro)m(vides)e(some)h(handy)e(wrapp)s(ers)f -(around)i(the)g(lo)m(w-lev)m(el)j(in)m(terface)f(to)f(facilitate)150 -4745 y(reading)25 b(and)f(writing)h Fi(bzip2)f Fj(format)h(\014les)g -(\()p Fi(.bz2)f Fj(\014les\).)39 b(The)25 b(routines)h(pro)m(vide)f(ho) -s(oks)g(to)g(facilitate)150 4854 y(reading)42 b(\014les)f(in)g(whic)m -(h)h(the)g Fi(bzip2)e Fj(data)i(stream)g(is)f(em)m(b)s(edded)g(within)g -(some)h(larger-scale)i(\014le)150 4964 y(structure,)31 -b(or)g(where)f(there)h(are)g(m)m(ultiple)g Fi(bzip2)e -Fj(data)h(streams)h(concatenated)i(end-to-end.)150 5121 -y(F)-8 b(or)31 b(reading)g(\014les,)f Fi(BZ2_bzReadOpen)p -Fj(,)d Fi(BZ2_bzRead)p Fj(,)h Fi(BZ2_bzReadClose)e Fj(and)150 -5230 y Fi(BZ2_bzReadGetUnused)18 b Fj(are)23 b(supplied.)37 -b(F)-8 b(or)24 b(writing)f(\014les,)i Fi(BZ2_bzWriteOpen)p -Fj(,)20 b Fi(BZ2_bzWrite)g Fj(and)150 5340 y Fi(BZ2_bzWriteFinish)26 -b Fj(are)k(a)m(v)-5 b(ailable.)p eop -%%Page: 11 12 -11 11 bop 150 -116 a Fl(Chapter)30 b(3:)41 b(Programming)29 -b(with)g Fi(libbzip2)1881 b Fl(11)150 299 y Fj(As)32 -b(with)h(the)g(lo)m(w-lev)m(el)j(library)-8 b(,)33 b(no)g(global)h(v)-5 -b(ariables)33 b(are)g(used)f(so)h(the)g(library)g(is)g(p)s(er)f(se)h -(thread-)150 408 y(safe.)62 b(Ho)m(w)m(ev)m(er,)41 b(if)c(I/O)h(errors) -g(o)s(ccur)f(whilst)g(reading)h(or)f(writing)h(the)f(underlying)g -(compressed)150 518 y(\014les,)j(y)m(ou)e(ma)m(y)h(ha)m(v)m(e)f(to)h -(consult)g Fi(errno)d Fj(to)j(determine)f(the)h(cause)f(of)g(the)g -(error.)64 b(In)37 b(that)i(case,)150 628 y(y)m(ou'd)30 -b(need)h(a)f(C)g(library)g(whic)m(h)g(correctly)j(supp)s(orts)c -Fi(errno)g Fj(in)h(a)g(m)m(ultithreaded)h(en)m(vironmen)m(t.)150 -784 y(T)-8 b(o)77 b(mak)m(e)f(the)g(library)g(a)f(little)j(simpler)e -(and)e(more)j(p)s(ortable,)87 b Fi(BZ2_bzReadOpen)72 -b Fj(and)150 894 y Fi(BZ2_bzWriteOpen)34 b Fj(require)39 -b(y)m(ou)g(to)g(pass)f(them)h(\014le)f(handles)g(\()p -Fi(FILE*)p Fj(s\))f(whic)m(h)h(ha)m(v)m(e)i(previously)150 -1004 y(b)s(een)56 b(op)s(ened)g(for)h(reading)f(or)h(writing)g(resp)s -(ectiv)m(ely)-8 b(.)122 b(That)57 b(a)m(v)m(oids)h(p)s(ortabilit)m(y)f -(problems)150 1113 y(asso)s(ciated)33 b(with)g(\014le)f(op)s(erations)h -(and)e(\014le)i(attributes,)h(whilst)e(not)h(b)s(eing)f(m)m(uc)m(h)g -(of)h(an)f(imp)s(osition)150 1223 y(on)e(the)h(programmer.)150 -1474 y Fe(3.1.3)63 b(Utilit)m(y)40 b(functions)h(summary)150 -1666 y Fj(F)-8 b(or)43 b(v)m(ery)h(simple)e(needs,)k -Fi(BZ2_bzBuffToBuffCompres)o(s)36 b Fj(and)42 b Fi -(BZ2_bzBuffToBuffDecompres)o(s)150 1776 y Fj(are)26 b(pro)m(vided.)39 -b(These)26 b(compress)f(data)h(in)f(memory)h(from)f(one)h(bu\013er)f -(to)h(another)g(bu\013er)f(in)g(a)g(single)150 1885 y(function)37 -b(call.)62 b(Y)-8 b(ou)38 b(should)e(assess)h(whether)g(these)h -(functions)f(ful\014ll)f(y)m(our)i(memory-to-memory)150 -1995 y(compression/decompression)22 b(requiremen)m(ts)g(b)s(efore)f(in) -m(v)m(esting)i(e\013ort)f(in)f(understanding)f(the)h(more)150 -2105 y(general)31 b(but)f(more)h(complex)g(lo)m(w-lev)m(el)i(in)m -(terface.)150 2261 y(Y)-8 b(oshiok)j(a)47 b(Tsuneo)g(\()p -Fi(QWF00133@niftyserve.or.jp)40 b Fj(/)47 b Fi -(tsuneo-y@is.aist-nara.ac)o(.jp)p Fj(\))40 b(has)150 -2371 y(con)m(tributed)f(some)f(functions)g(to)h(giv)m(e)h(b)s(etter)e -Fi(zlib)f Fj(compatibilit)m(y)-8 b(.)67 b(These)38 b(functions)g(are)g -Fi(BZ2_)150 2481 y(bzopen)p Fj(,)g Fi(BZ2_bzread)p Fj(,)f -Fi(BZ2_bzwrite)p Fj(,)g Fi(BZ2_bzflush)p Fj(,)g Fi(BZ2_bzclose)p -Fj(,)g Fi(BZ2_bzerror)e Fj(and)i Fi(BZ2_)150 2590 y(bzlibVersion)p -Fj(.)44 b(Y)-8 b(ou)33 b(ma)m(y)g(\014nd)e(these)i(functions)g(more)g -(con)m(v)m(enien)m(t)i(for)e(simple)f(\014le)h(reading)g(and)150 -2700 y(writing,)d(than)g(those)g(in)f(the)h(high-lev)m(el)i(in)m -(terface.)42 b(These)30 b(functions)f(are)h(not)g(\(y)m(et\))i -(o\016cially)f(part)150 2809 y(of)h(the)g(library)-8 -b(,)33 b(and)e(are)h(minimally)g(do)s(cumen)m(ted)g(here.)45 -b(If)31 b(they)h(break,)h(y)m(ou)f(get)h(to)g(k)m(eep)f(all)h(the)150 -2919 y(pieces.)42 b(I)30 b(hop)s(e)g(to)h(do)s(cumen)m(t)f(them)h(prop) -s(erly)e(when)h(time)h(p)s(ermits.)150 3076 y(Y)-8 b(oshiok)j(a)27 -b(also)g(con)m(tributed)g(mo)s(di\014cations)g(to)g(allo)m(w)h(the)e -(library)h(to)g(b)s(e)f(built)g(as)g(a)h(Windo)m(ws)f(DLL.)150 -3362 y Fk(3.2)68 b(Error)45 b(handling)150 3554 y Fj(The)22 -b(library)g(is)g(designed)g(to)i(reco)m(v)m(er)g(cleanly)f(in)f(all)h -(situations,)i(including)c(the)i(w)m(orst-case)h(situation)150 -3664 y(of)k(decompressing)g(random)f(data.)40 b(I'm)27 -b(not)h(100\045)h(sure)e(that)i(it)f(can)g(alw)m(a)m(ys)g(do)g(this,)g -(so)g(y)m(ou)g(migh)m(t)150 3774 y(w)m(an)m(t)i(to)g(add)e(a)i(signal)f -(handler)g(to)h(catc)m(h)g(segmen)m(tation)i(violations)f(during)d -(decompression)h(if)h(y)m(ou)150 3883 y(are)c(feeling)g(esp)s(ecially)g -(paranoid.)39 b(I)25 b(w)m(ould)g(b)s(e)g(in)m(terested)i(in)e(hearing) -g(more)h(ab)s(out)f(the)h(robustness)150 3993 y(of)31 -b(the)f(library)h(to)g(corrupted)f(compressed)g(data.)150 -4150 y(V)-8 b(ersion)38 b(1.0)h(is)e(m)m(uc)m(h)h(more)g(robust)f(in)g -(this)h(resp)s(ect)g(than)f(0.9.0)i(or)f(0.9.5.)64 b(In)m(v)m -(estigations)40 b(with)150 4259 y(Chec)m(k)m(er)30 b(\(a)f(to)s(ol)h -(for)f(detecting)h(problems)f(with)f(memory)h(managemen)m(t,)i(similar) -e(to)h(Purify\))e(indi-)150 4369 y(cate)34 b(that,)g(at)f(least)g(for)g -(the)g(few)f(\014les)g(I)h(tested,)h(all)f(single-bit)h(errors)e(in)g -(the)h(decompressed)g(data)150 4478 y(are)28 b(caugh)m(t)h(prop)s(erly) --8 b(,)29 b(with)f(no)g(segmen)m(tation)i(faults,)f(no)f(reads)g(of)g -(uninitialised)g(data)g(and)g(no)g(out)150 4588 y(of)j(range)h(reads)f -(or)h(writes.)44 b(So)31 b(it's)h(certainly)h(m)m(uc)m(h)e(impro)m(v)m -(ed,)i(although)e(I)h(w)m(ouldn't)f(claim)h(it)g(to)150 -4698 y(b)s(e)e(totally)i(b)s(om)m(bpro)s(of.)150 4854 -y(The)24 b(\014le)g Fi(bzlib.h)e Fj(con)m(tains)j(all)f(de\014nitions)g -(needed)g(to)g(use)g(the)g(library)-8 b(.)39 b(In)24 -b(particular,)i(y)m(ou)e(should)150 4964 y(de\014nitely)31 -b(not)f(include)h Fi(bzlib_private.h)p Fj(.)150 5121 -y(In)38 b Fi(bzlib.h)p Fj(,)g(the)g(v)-5 b(arious)38 -b(return)g(v)-5 b(alues)38 b(are)g(de\014ned.)62 b(The)38 -b(follo)m(wing)i(list)f(is)f(not)g(in)m(tended)g(as)150 -5230 y(an)33 b(exhaustiv)m(e)h(description)g(of)g(the)g(circumstances)g -(in)f(whic)m(h)g(a)h(giv)m(en)g(v)-5 b(alue)34 b(ma)m(y)g(b)s(e)f -(returned)g({)150 5340 y(those)h(descriptions)f(are)h(giv)m(en)g -(later.)50 b(Rather,)34 b(it)g(is)f(in)m(tended)g(to)h(con)m(v)m(ey)h -(the)e(rough)g(meaning)g(of)p eop -%%Page: 12 13 -12 12 bop 150 -116 a Fl(Chapter)30 b(3:)41 b(Programming)29 -b(with)g Fi(libbzip2)1881 b Fl(12)150 299 y Fj(eac)m(h)35 -b(return)f(v)-5 b(alue.)53 b(The)34 b(\014rst)g(\014v)m(e)h(actions)g -(are)g(normal)f(and)f(not)i(in)m(tended)f(to)i(denote)f(an)e(error)150 -408 y(situation.)150 592 y Fi(BZ_OK)240 b Fj(The)30 b(requested)h -(action)h(w)m(as)e(completed)i(successfully)-8 b(.)150 -756 y Fi(BZ_RUN_OK)150 866 y(BZ_FLUSH_OK)150 975 y(BZ_FINISH_OK)630 -1085 y Fj(In)22 b Fi(BZ2_bzCompress)p Fj(,)e(the)j(requested)g -(\015ush/\014nish/nothing-sp)s(ecial)e(action)j(w)m(as)e(com-)630 -1194 y(pleted)31 b(successfully)-8 b(.)150 1358 y Fi(BZ_STREAM_END)630 -1468 y Fj(Compression)38 b(of)g(data)h(w)m(as)f(completed,)j(or)e(the)f -(logical)j(stream)e(end)e(w)m(as)h(detected)630 1577 -y(during)29 b(decompression.)150 1761 y(The)h(follo)m(wing)i(return)e -(v)-5 b(alues)31 b(indicate)g(an)f(error)h(of)f(some)h(kind.)150 -1945 y Fi(BZ_CONFIG_ERROR)630 2055 y Fj(Indicates)46 -b(that)h(the)f(library)f(has)h(b)s(een)f(improp)s(erly)g(compiled)h(on) -g(y)m(our)g(platform)630 2164 y({)k(a)h(ma)5 b(jor)50 -b(con\014guration)h(error.)100 b(Sp)s(eci\014cally)-8 -b(,)56 b(it)51 b(means)e(that)i Fi(sizeof\(char\))p Fj(,)630 -2274 y Fi(sizeof\(short\))43 b Fj(and)i Fi(sizeof\(int\))e -Fj(are)k(not)g(1,)k(2)46 b(and)g(4)h(resp)s(ectiv)m(ely)-8 -b(,)52 b(as)47 b(they)630 2384 y(should)26 b(b)s(e.)38 -b(Note)29 b(that)e(the)g(library)f(should)g(still)h(w)m(ork)g(prop)s -(erly)f(on)h(64-bit)h(platforms)630 2493 y(whic)m(h)43 -b(follo)m(w)h(the)g(LP64)f(programming)h(mo)s(del)e({)i(that)g(is,)i -(where)d Fi(sizeof\(long\))630 2603 y Fj(and)32 b Fi(sizeof\(void*\))e -Fj(are)j(8.)50 b(Under)32 b(LP64,)j Fi(sizeof\(int\))30 -b Fj(is)j(still)h(4,)h(so)e Fi(libbzip2)p Fj(,)630 2712 -y(whic)m(h)d(do)s(esn't)g(use)h(the)f Fi(long)f Fj(t)m(yp)s(e,)i(is)g -(OK.)150 2876 y Fi(BZ_SEQUENCE_ERROR)630 2986 y Fj(When)41 -b(using)g(the)h(library)-8 b(,)45 b(it)d(is)g(imp)s(ortan)m(t)g(to)g -(call)h(the)f(functions)f(in)g(the)h(correct)630 3095 -y(sequence)25 b(and)f(with)g(data)h(structures)g(\(bu\013ers)f(etc\))i -(in)f(the)g(correct)h(states.)40 b Fi(libbzip2)630 3205 -y Fj(c)m(hec)m(ks)25 b(as)e(m)m(uc)m(h)g(as)g(it)h(can)f(to)h(ensure)f -(this)g(is)g(happ)s(ening,)g(and)g(returns)f Fi(BZ_SEQUENCE_)630 -3314 y(ERROR)35 b Fj(if)i(not.)59 b(Co)s(de)36 b(whic)m(h)h(complies)g -(precisely)h(with)e(the)h(function)f(seman)m(tics,)k(as)630 -3424 y(detailed)c(b)s(elo)m(w,)h(should)e(nev)m(er)g(receiv)m(e)j(this) -d(v)-5 b(alue;)38 b(suc)m(h)d(an)g(ev)m(en)m(t)i(denotes)f(buggy)630 -3534 y(co)s(de)31 b(whic)m(h)f(y)m(ou)g(should)g(in)m(v)m(estigate.)150 -3697 y Fi(BZ_PARAM_ERROR)630 3807 y Fj(Returned)41 b(when)f(a)h -(parameter)h(to)g(a)g(function)f(call)h(is)f(out)h(of)f(range)h(or)f -(otherwise)630 3917 y(manifestly)35 b(incorrect.)55 b(As)34 -b(with)g Fi(BZ_SEQUENCE_ERROR)p Fj(,)d(this)k(denotes)g(a)f(bug)g(in)h -(the)630 4026 y(clien)m(t)24 b(co)s(de.)38 b(The)22 b(distinction)h(b)s -(et)m(w)m(een)f Fi(BZ_PARAM_ERROR)d Fj(and)i Fi(BZ_SEQUENCE_ERROR)630 -4136 y Fj(is)30 b(a)h(bit)f(hazy)-8 b(,)32 b(but)d(still)j(w)m(orth)e -(making.)150 4300 y Fi(BZ_MEM_ERROR)630 4409 y Fj(Returned)44 -b(when)g(a)h(request)g(to)g(allo)s(cate)i(memory)e(failed.)83 -b(Note)46 b(that)g(the)f(quan-)630 4519 y(tit)m(y)d(of)f(memory)f -(needed)g(to)i(decompress)e(a)g(stream)h(cannot)g(b)s(e)f(determined)h -(un)m(til)630 4628 y(the)f(stream's)h(header)e(has)h(b)s(een)f(read.)69 -b(So)39 b Fi(BZ2_bzDecompress)d Fj(and)j Fi(BZ2_bzRead)630 -4738 y Fj(ma)m(y)29 b(return)f Fi(BZ_MEM_ERROR)d Fj(ev)m(en)30 -b(though)e(some)h(of)g(the)g(compressed)g(data)g(has)f(b)s(een)630 -4847 y(read.)76 b(The)42 b(same)g(is)g(not)g(true)g(for)h(compression;) -48 b(once)43 b Fi(BZ2_bzCompressInit)37 b Fj(or)630 4957 -y Fi(BZ2_bzWriteOpen)26 b Fj(ha)m(v)m(e)32 b(successfully)e(completed,) -i Fi(BZ_MEM_ERROR)27 b Fj(cannot)k(o)s(ccur.)150 5121 -y Fi(BZ_DATA_ERROR)630 5230 y Fj(Returned)d(when)g(a)h(data)g(in)m -(tegrit)m(y)i(error)e(is)g(detected)h(during)d(decompression.)41 -b(Most)630 5340 y(imp)s(ortan)m(tly)-8 b(,)32 b(this)e(means)g(when)f -(stored)i(and)e(computed)h(CR)m(Cs)g(for)g(the)h(data)f(do)h(not)p -eop -%%Page: 13 14 -13 13 bop 150 -116 a Fl(Chapter)30 b(3:)41 b(Programming)29 -b(with)g Fi(libbzip2)1881 b Fl(13)630 299 y Fj(matc)m(h.)41 -b(This)28 b(v)-5 b(alue)28 b(is)h(also)g(returned)e(up)s(on)g -(detection)j(of)f(an)m(y)f(other)h(anomaly)g(in)f(the)630 -408 y(compressed)i(data.)150 560 y Fi(BZ_DATA_ERROR_MAGIC)630 -670 y Fj(As)j(a)g(sp)s(ecial)g(case)h(of)f Fi(BZ_DATA_ERROR)p -Fj(,)d(it)k(is)f(sometimes)h(useful)f(to)h(kno)m(w)f(when)f(the)630 -779 y(compressed)c(stream)g(do)s(es)g(not)g(start)g(with)g(the)g -(correct)i(magic)e(b)m(ytes)h(\()p Fi('B')h('Z')f('h')p -Fj(\).)150 931 y Fi(BZ_IO_ERROR)630 1040 y Fj(Returned)h(b)m(y)h -Fi(BZ2_bzRead)d Fj(and)h Fi(BZ2_bzWrite)f Fj(when)i(there)h(is)g(an)f -(error)h(reading)g(or)630 1150 y(writing)e(in)f(the)h(compressed)g -(\014le,)g(and)f(b)m(y)h Fi(BZ2_bzReadOpen)24 b Fj(and)k -Fi(BZ2_bzWriteOpen)630 1259 y Fj(for)h(attempts)i(to)f(use)f(a)h -(\014le)f(for)h(whic)m(h)f(the)h(error)f(indicator)i(\(viz,)f -Fi(ferror\(f\))p Fj(\))e(is)h(set.)630 1369 y(On)h(receipt)i(of)f -Fi(BZ_IO_ERROR)p Fj(,)c(the)k(caller)h(should)e(consult)h -Fi(errno)e Fj(and/or)h Fi(perror)f Fj(to)630 1479 y(acquire)i(op)s -(erating-system)h(sp)s(eci\014c)e(information)h(ab)s(out)f(the)g -(problem.)150 1630 y Fi(BZ_UNEXPECTED_EOF)630 1740 y -Fj(Returned)j(b)m(y)g Fi(BZ2_bzRead)e Fj(when)h(the)i(compressed)f -(\014le)h(\014nishes)e(b)s(efore)h(the)h(logical)630 -1849 y(end)c(of)g(stream)h(is)g(detected.)150 2001 y -Fi(BZ_OUTBUFF_FULL)630 2110 y Fj(Returned)d(b)m(y)g Fi -(BZ2_bzBuffToBuffCompress)22 b Fj(and)27 b Fi -(BZ2_bzBuffToBuffDecompres)o(s)630 2220 y Fj(to)k(indicate)h(that)f -(the)f(output)g(data)h(will)g(not)g(\014t)f(in)m(to)h(the)g(output)f -(bu\013er)g(pro)m(vided.)150 2492 y Fk(3.3)68 b(Lo)l(w-lev)l(el)47 -b(in)l(terface)150 2766 y Fe(3.3.1)63 b Fd(BZ2_bzCompressInit)390 -2953 y Fi(typedef)533 3057 y(struct)46 b({)676 3161 y(char)h(*next_in;) -676 3264 y(unsigned)f(int)h(avail_in;)676 3368 y(unsigned)f(int)h -(total_in_lo32;)676 3472 y(unsigned)f(int)h(total_in_hi32;)676 -3680 y(char)g(*next_out;)676 3783 y(unsigned)f(int)h(avail_out;)676 -3887 y(unsigned)f(int)h(total_out_lo32;)676 3991 y(unsigned)f(int)h -(total_out_hi32;)676 4198 y(void)g(*state;)676 4406 y(void)g -(*\(*bzalloc\)\(void)c(*,int,int\);)676 4510 y(void)k -(\(*bzfree\)\(void)d(*,void)i(*\);)676 4614 y(void)h(*opaque;)533 -4717 y(})533 4821 y(bz_stream;)390 5029 y(int)g(BZ2_bzCompressInit)c -(\()k(bz_stream)e(*strm,)1583 5132 y(int)i(blockSize100k,)1583 -5236 y(int)g(verbosity,)1583 5340 y(int)g(workFactor)e(\);)p -eop -%%Page: 14 15 -14 14 bop 150 -116 a Fl(Chapter)30 b(3:)41 b(Programming)29 -b(with)g Fi(libbzip2)1881 b Fl(14)150 456 y Fj(Prepares)31 -b(for)g(compression.)42 b(The)30 b Fi(bz_stream)e Fj(structure)j(holds) -g(all)g(data)g(p)s(ertaining)g(to)g(the)g(com-)150 565 -y(pression)k(activit)m(y)-8 b(.)59 b(A)35 b Fi(bz_stream)d -Fj(structure)k(should)e(b)s(e)h(allo)s(cated)i(and)e(initialised)h -(prior)f(to)i(the)150 675 y(call.)65 b(The)38 b(\014elds)g(of)g -Fi(bz_stream)e Fj(comprise)i(the)h(en)m(tiret)m(y)h(of)f(the)f -(user-visible)h(data.)64 b Fi(state)37 b Fj(is)h(a)150 -784 y(p)s(oin)m(ter)31 b(to)g(the)g(priv)-5 b(ate)31 -b(data)f(structures)h(required)f(for)h(compression.)150 -941 y(Custom)36 b(memory)g(allo)s(cators)i(are)f(supp)s(orted,)f(via)h -(\014elds)e Fi(bzalloc)p Fj(,)h Fi(bzfree)p Fj(,)g(and)g -Fi(opaque)p Fj(.)56 b(The)150 1051 y(v)-5 b(alue)32 b -Fi(opaque)e Fj(is)i(passed)f(to)i(as)f(the)g(\014rst)g(argumen)m(t)g -(to)h(all)f(calls)h(to)g Fi(bzalloc)d Fj(and)h Fi(bzfree)p -Fj(,)g(but)g(is)150 1160 y(otherwise)c(ignored)g(b)m(y)g(the)g(library) --8 b(.)39 b(The)27 b(call)g Fi(bzalloc)i(\()h(opaque,)e(n,)i(m)g(\))c -Fj(is)h(exp)s(ected)g(to)g(return)150 1270 y(a)h(p)s(oin)m(ter)g -Fi(p)f Fj(to)i Fi(n)h(*)g(m)d Fj(b)m(ytes)h(of)g(memory)-8 -b(,)29 b(and)e Fi(bzfree)h(\()i(opaque,)f(p)h(\))d Fj(should)g(free)h -(that)g(memory)-8 b(.)150 1427 y(If)32 b(y)m(ou)h(don't)g(w)m(an)m(t)g -(to)h(use)e(a)h(custom)g(memory)g(allo)s(cator,)i(set)e -Fi(bzalloc)p Fj(,)f Fi(bzfree)e Fj(and)i Fi(opaque)f -Fj(to)150 1537 y Fi(NULL)p Fj(,)e(and)h(the)h(library)f(will)h(then)f -(use)g(the)h(standard)e Fi(malloc)p Fj(/)p Fi(free)f -Fj(routines.)150 1693 y(Before)37 b(calling)h Fi(BZ2_bzCompressInit)p -Fj(,)33 b(\014elds)k Fi(bzalloc)p Fj(,)f Fi(bzfree)f -Fj(and)h Fi(opaque)e Fj(should)i(b)s(e)g(\014lled)150 -1803 y(appropriately)-8 b(,)35 b(as)e(just)f(describ)s(ed.)48 -b(Up)s(on)31 b(return,)j(the)f(in)m(ternal)h(state)g(will)f(ha)m(v)m(e) -h(b)s(een)e(allo)s(cated)150 1913 y(and)43 b(initialised,)49 -b(and)43 b Fi(total_in_lo32)p Fj(,)h Fi(total_in_hi32)p -Fj(,)g Fi(total_out_lo32)c Fj(and)j Fi(total_out_)150 -2022 y(hi32)35 b Fj(will)i(ha)m(v)m(e)g(b)s(een)f(set)h(to)g(zero.)59 -b(These)37 b(four)f(\014elds)f(are)i(used)f(b)m(y)g(the)g(library)h(to) -g(inform)f(the)150 2132 y(caller)41 b(of)e(the)h(total)h(amoun)m(t)f -(of)f(data)h(passed)e(in)m(to)j(and)d(out)i(of)f(the)h(library)-8 -b(,)42 b(resp)s(ectiv)m(ely)-8 b(.)70 b(Y)-8 b(ou)150 -2241 y(should)34 b(not)h(try)h(to)g(c)m(hange)g(them.)55 -b(As)34 b(of)h(v)m(ersion)h(1.0,)i(64-bit)e(coun)m(ts)g(are)f(main)m -(tained,)i(ev)m(en)f(on)150 2351 y(32-bit)j(platforms,)g(using)e(the)h -Fi(_hi32)d Fj(\014elds)i(to)h(store)h(the)e(upp)s(er)f(32)i(bits)f(of)h -(the)f(coun)m(t.)63 b(So,)39 b(for)150 2460 y(example,)31 -b(the)g(total)h(amoun)m(t)f(of)f(data)h(in)f(is)g Fi(\(total_in_hi32)d -(<<)j(32\))f(+)h(total_in_lo32)p Fj(.)150 2617 y(P)m(arameter)37 -b Fi(blockSize100k)31 b Fj(sp)s(eci\014es)k(the)h(blo)s(c)m(k)g(size)g -(to)g(b)s(e)e(used)h(for)g(compression.)55 b(It)36 b(should)150 -2727 y(b)s(e)27 b(a)h(v)-5 b(alue)28 b(b)s(et)m(w)m(een)g(1)g(and)f(9)h -(inclusiv)m(e,)i(and)c(the)i(actual)h(blo)s(c)m(k)f(size)h(used)e(is)g -(100000)k(x)c(this)h(\014gure.)150 2836 y(9)j(giv)m(es)h(the)e(b)s(est) -g(compression)h(but)f(tak)m(es)h(most)g(memory)-8 b(.)150 -2993 y(P)m(arameter)28 b Fi(verbosity)c Fj(should)h(b)s(e)g(set)i(to)g -(a)g(n)m(um)m(b)s(er)e(b)s(et)m(w)m(een)i(0)f(and)g(4)g(inclusiv)m(e.) -41 b(0)26 b(is)g(silen)m(t,)j(and)150 3103 y(greater)d(n)m(um)m(b)s -(ers)e(giv)m(e)i(increasingly)g(v)m(erb)s(ose)f(monitoring/debugging)h -(output.)38 b(If)25 b(the)g(library)f(has)150 3212 y(b)s(een)30 -b(compiled)h(with)f Fi(-DBZ_NO_STDIO)p Fj(,)d(no)j(suc)m(h)g(output)g -(will)h(app)s(ear)f(for)g(an)m(y)g(v)m(erb)s(osit)m(y)i(setting.)150 -3369 y(P)m(arameter)g Fi(workFactor)c Fj(con)m(trols)k(ho)m(w)f(the)g -(compression)g(phase)f(b)s(eha)m(v)m(es)h(when)e(presen)m(ted)i(with) -150 3479 y(w)m(orst)38 b(case,)j(highly)c(rep)s(etitiv)m(e,)42 -b(input)37 b(data.)62 b(If)38 b(compression)g(runs)e(in)m(to)i -(di\016culties)h(caused)e(b)m(y)150 3588 y(rep)s(etitiv)m(e)46 -b(data,)i(the)c(library)f(switc)m(hes)i(from)f(the)g(standard)f -(sorting)h(algorithm)h(to)g(a)f(fallbac)m(k)150 3698 -y(algorithm.)c(The)27 b(fallbac)m(k)g(is)f(slo)m(w)m(er)i(than)d(the)i -(standard)e(algorithm)j(b)m(y)e(p)s(erhaps)e(a)i(factor)i(of)e(three,) -150 3808 y(but)k(alw)m(a)m(ys)h(b)s(eha)m(v)m(es)g(reasonably)-8 -b(,)31 b(no)f(matter)i(ho)m(w)e(bad)g(the)g(input.)150 -3965 y(Lo)m(w)m(er)23 b(v)-5 b(alues)23 b(of)g Fi(workFactor)c -Fj(reduce)k(the)g(amoun)m(t)g(of)f(e\013ort)i(the)f(standard)f -(algorithm)h(will)g(exp)s(end)150 4074 y(b)s(efore)h(resorting)g(to)h -(the)f(fallbac)m(k.)39 b(Y)-8 b(ou)24 b(should)f(set)h(this)g -(parameter)g(carefully;)j(to)s(o)d(lo)m(w,)i(and)d(man)m(y)150 -4184 y(inputs)32 b(will)h(b)s(e)f(handled)g(b)m(y)h(the)g(fallbac)m(k)g -(algorithm)h(and)e(so)h(compress)g(rather)g(slo)m(wly)-8 -b(,)35 b(to)s(o)f(high,)150 4293 y(and)41 b(y)m(our)g(a)m(v)m -(erage-to-w)m(orst)46 b(case)d(compression)e(times)h(can)g(b)s(ecome)g -(v)m(ery)g(large.)74 b(The)42 b(default)150 4403 y(v)-5 -b(alue)31 b(of)f(30)h(giv)m(es)h(reasonable)f(b)s(eha)m(viour)f(o)m(v)m -(er)i(a)f(wide)f(range)h(of)f(circumstances.)150 4560 -y(Allo)m(w)m(able)j(v)-5 b(alues)31 b(range)g(from)g(0)g(to)h(250)g -(inclusiv)m(e.)44 b(0)31 b(is)g(a)g(sp)s(ecial)h(case,)g(equiv)-5 -b(alen)m(t)32 b(to)g(using)f(the)150 4669 y(default)f(v)-5 -b(alue)31 b(of)g(30.)150 4826 y(Note)k(that)e(the)h(compressed)f -(output)g(generated)h(is)f(the)h(same)f(regardless)h(of)f(whether)g(or) -g(not)h(the)150 4936 y(fallbac)m(k)d(algorithm)h(is)e(used.)150 -5093 y(Be)f(a)m(w)m(are)g(also)h(that)f(this)f(parameter)i(ma)m(y)f -(disapp)s(ear)e(en)m(tirely)j(in)e(future)g(v)m(ersions)i(of)e(the)h -(library)-8 b(.)150 5202 y(In)43 b(principle)h(it)g(should)f(b)s(e)g(p) -s(ossible)h(to)g(devise)h(a)f(go)s(o)s(d)f(w)m(a)m(y)i(to)f -(automatically)i(c)m(ho)s(ose)f(whic)m(h)150 5312 y(algorithm)32 -b(to)f(use.)40 b(Suc)m(h)30 b(a)g(mec)m(hanism)h(w)m(ould)f(render)g -(the)h(parameter)g(obsolete.)p eop -%%Page: 15 16 -15 15 bop 150 -116 a Fl(Chapter)30 b(3:)41 b(Programming)29 -b(with)g Fi(libbzip2)1881 b Fl(15)150 299 y Fj(P)m(ossible)32 -b(return)e(v)-5 b(alues:)572 450 y Fi(BZ_CONFIG_ERROR)663 -554 y Fj(if)30 b(the)h(library)f(has)g(b)s(een)g(mis-compiled)572 -657 y Fi(BZ_PARAM_ERROR)663 761 y Fj(if)g Fi(strm)f Fj(is)i -Fi(NULL)663 865 y Fj(or)f Fi(blockSize)e(<)i Fj(1)h(or)g -Fi(blockSize)c(>)k Fj(9)663 969 y(or)f Fi(verbosity)e(<)i -Fj(0)h(or)g Fi(verbosity)c(>)k Fj(4)663 1073 y(or)f Fi(workFactor)e(<)i -Fj(0)h(or)f Fi(workFactor)e(>)i Fj(250)572 1176 y Fi(BZ_MEM_ERROR)663 -1280 y Fj(if)g(not)h(enough)f(memory)h(is)f(a)m(v)-5 -b(ailable)572 1384 y Fi(BZ_OK)663 1488 y Fj(otherwise)150 -1645 y(Allo)m(w)m(able)32 b(next)f(actions:)572 1796 -y Fi(BZ2_bzCompress)663 1899 y Fj(if)f Fi(BZ_OK)f Fj(is)h(returned)572 -2003 y(no)g(sp)s(eci\014c)g(action)i(needed)e(in)g(case)h(of)g(error) -150 2255 y Fe(3.3.2)63 b Fd(BZ2_bzCompress)533 2441 y -Fi(int)47 b(BZ2_bzCompress)d(\()j(bz_stream)f(*strm,)g(int)h(action)f -(\);)150 2598 y Fj(Pro)m(vides)27 b(more)f(input)f(and/or)h(output)g -(bu\013er)f(space)h(for)g(the)h(library)-8 b(.)39 b(The)26 -b(caller)h(main)m(tains)g(input)150 2708 y(and)j(output)g(bu\013ers,)g -(and)f(calls)i Fi(BZ2_bzCompress)c Fj(to)k(transfer)f(data)h(b)s(et)m -(w)m(een)g(them.)150 2865 y(Before)h(eac)m(h)h(call)g(to)f -Fi(BZ2_bzCompress)p Fj(,)d Fi(next_in)h Fj(should)g(p)s(oin)m(t)i(at)h -(the)f(data)g(to)h(b)s(e)e(compressed,)150 2974 y(and)40 -b Fi(avail_in)f Fj(should)h(indicate)i(ho)m(w)f(man)m(y)g(b)m(ytes)h -(the)f(library)g(ma)m(y)g(read.)73 b Fi(BZ2_bzCompress)150 -3084 y Fj(up)s(dates)29 b Fi(next_in)p Fj(,)g Fi(avail_in)f -Fj(and)i Fi(total_in)e Fj(to)j(re\015ect)g(the)g(n)m(um)m(b)s(er)e(of)i -(b)m(ytes)g(it)g(has)f(read.)150 3241 y(Similarly)-8 -b(,)30 b Fi(next_out)d Fj(should)h(p)s(oin)m(t)h(to)h(a)f(bu\013er)f -(in)h(whic)m(h)g(the)g(compressed)g(data)g(is)g(to)h(b)s(e)e(placed,) -150 3350 y(with)k Fi(avail_out)f Fj(indicating)i(ho)m(w)g(m)m(uc)m(h)f -(output)h(space)g(is)f(a)m(v)-5 b(ailable.)49 b Fi(BZ2_bzCompress)29 -b Fj(up)s(dates)150 3460 y Fi(next_out)p Fj(,)f Fi(avail_out)g -Fj(and)i Fi(total_out)e Fj(to)j(re\015ect)g(the)g(n)m(um)m(b)s(er)e(of) -i(b)m(ytes)g(output.)150 3617 y(Y)-8 b(ou)39 b(ma)m(y)g(pro)m(vide)g -(and)f(remo)m(v)m(e)i(as)f(little)i(or)e(as)g(m)m(uc)m(h)f(data)h(as)g -(y)m(ou)g(lik)m(e)h(on)f(eac)m(h)g(call)h(of)f Fi(BZ2_)150 -3726 y(bzCompress)p Fj(.)46 b(In)32 b(the)i(limit,)h(it)e(is)g -(acceptable)i(to)f(supply)e(and)g(remo)m(v)m(e)j(data)e(one)g(b)m(yte)h -(at)g(a)f(time,)150 3836 y(although)27 b(this)g(w)m(ould)f(b)s(e)g -(terribly)i(ine\016cien)m(t.)40 b(Y)-8 b(ou)27 b(should)f(alw)m(a)m(ys) -i(ensure)e(that)h(at)g(least)h(one)f(b)m(yte)150 3946 -y(of)k(output)f(space)g(is)h(a)m(v)-5 b(ailable)32 b(at)f(eac)m(h)g -(call.)150 4102 y(A)36 b(second)g(purp)s(ose)f(of)i Fi(BZ2_bzCompress) -32 b Fj(is)37 b(to)g(request)g(a)f(c)m(hange)h(of)g(mo)s(de)f(of)g(the) -h(compressed)150 4212 y(stream.)150 4369 y(Conceptually)-8 -b(,)24 b(a)d(compressed)g(stream)g(can)g(b)s(e)f(in)h(one)g(of)g(four)f -(states:)38 b(IDLE,)20 b(R)m(UNNING,)i(FLUSH-)150 4478 -y(ING)34 b(and)d(FINISHING.)k(Before)e(initialisation)i(\()p -Fi(BZ2_bzCompressInit)p Fj(\))28 b(and)k(after)i(termination)150 -4588 y(\()p Fi(BZ2_bzCompressEnd)p Fj(\),)27 b(a)j(stream)h(is)f -(regarded)h(as)f(IDLE.)150 4745 y(Up)s(on)j(initialisation)i(\()p -Fi(BZ2_bzCompressInit)p Fj(\),)c(the)j(stream)g(is)g(placed)g(in)f(the) -h(R)m(UNNING)h(state.)150 4854 y(Subsequen)m(t)k(calls)h(to)h -Fi(BZ2_bzCompress)36 b Fj(should)i(pass)i Fi(BZ_RUN)e -Fj(as)h(the)h(requested)g(action;)46 b(other)150 4964 -y(actions)31 b(are)g(illegal)i(and)c(will)i(result)g(in)f -Fi(BZ_SEQUENCE_ERROR)p Fj(.)150 5121 y(A)m(t)37 b(some)f(p)s(oin)m(t,)i -(the)e(calling)h(program)f(will)h(ha)m(v)m(e)g(pro)m(vided)e(all)i(the) -f(input)g(data)g(it)g(w)m(an)m(ts)h(to.)58 b(It)150 5230 -y(will)30 b(then)f(w)m(an)m(t)h(to)g(\014nish)e(up)g({)i(in)f -(e\013ect,)j(asking)d(the)h(library)f(to)h(pro)s(cess)g(an)m(y)f(data)h -(it)g(migh)m(t)g(ha)m(v)m(e)150 5340 y(bu\013ered)23 -b(in)m(ternally)-8 b(.)40 b(In)24 b(this)g(state,)j Fi(BZ2_bzCompress) -20 b Fj(will)k(no)g(longer)h(attempt)g(to)g(read)f(data)g(from)p -eop -%%Page: 16 17 -16 16 bop 150 -116 a Fl(Chapter)30 b(3:)41 b(Programming)29 -b(with)g Fi(libbzip2)1881 b Fl(16)150 299 y Fi(next_in)p -Fj(,)32 b(but)g(it)i(will)f(w)m(an)m(t)h(to)g(write)g(data)f(to)h -Fi(next_out)p Fj(.)46 b(Because)34 b(the)f(output)g(bu\013er)f -(supplied)150 408 y(b)m(y)d(the)g(user)g(can)g(b)s(e)f(arbitrarily)i -(small,)g(the)f(\014nishing-up)f(op)s(eration)h(cannot)h(necessarily)g -(b)s(e)e(done)150 518 y(with)i(a)h(single)g(call)g(of)g -Fi(BZ2_bzCompress)p Fj(.)150 675 y(Instead,)47 b(the)d(calling)g -(program)g(passes)f Fi(BZ_FINISH)e Fj(as)i(an)g(action)i(to)f -Fi(BZ2_bzCompress)p Fj(.)76 b(This)150 784 y(c)m(hanges)26 -b(the)g(stream's)g(state)h(to)f(FINISHING.)h(An)m(y)e(remaining)h -(input)e(\(ie,)k Fi(next_in[0)g(..)i(avail_)150 894 y(in-1])p -Fj(\))k(is)h(compressed)g(and)f(transferred)h(to)h(the)f(output)g -(bu\013er.)54 b(T)-8 b(o)36 b(do)f(this,)i Fi(BZ2_bzCompress)150 -1004 y Fj(m)m(ust)h(b)s(e)f(called)i(rep)s(eatedly)g(un)m(til)f(all)g -(the)h(output)f(has)f(b)s(een)g(consumed.)63 b(A)m(t)38 -b(that)h(p)s(oin)m(t,)h Fi(BZ2_)150 1113 y(bzCompress)g -Fj(returns)i Fi(BZ_STREAM_END)p Fj(,)g(and)f(the)i(stream's)g(state)h -(is)e(set)h(bac)m(k)g(to)h(IDLE.)d Fi(BZ2_)150 1223 y(bzCompressEnd)27 -b Fj(should)i(then)h(b)s(e)g(called.)150 1380 y(Just)g(to)i(mak)m(e)f -(sure)f(the)h(calling)g(program)g(do)s(es)f(not)h(c)m(heat,)h(the)f -(library)f(mak)m(es)h(a)g(note)g(of)g Fi(avail_)150 1489 -y(in)i Fj(at)h(the)g(time)h(of)e(the)h(\014rst)g(call)g(to)h -Fi(BZ2_bzCompress)30 b Fj(whic)m(h)j(has)g Fi(BZ_FINISH)e -Fj(as)j(an)f(action)i(\(ie,)150 1599 y(at)g(the)g(time)h(the)e(program) -h(has)f(announced)g(its)h(in)m(ten)m(tion)h(to)g(not)f(supply)e(an)m(y) -h(more)h(input\).)53 b(By)150 1708 y(comparing)30 b(this)f(v)-5 -b(alue)29 b(with)h(that)f(of)h Fi(avail_in)d Fj(o)m(v)m(er)k(subsequen) -m(t)d(calls)j(to)f Fi(BZ2_bzCompress)p Fj(,)c(the)150 -1818 y(library)k(can)f(detect)j(an)m(y)e(attempts)h(to)f(slip)g(in)f -(more)h(data)g(to)h(compress.)41 b(An)m(y)29 b(calls)i(for)e(whic)m(h)h -(this)150 1928 y(is)h(detected)i(will)f(return)f Fi(BZ_SEQUENCE_ERROR)p -Fj(.)39 b(This)31 b(indicates)i(a)e(programming)g(mistak)m(e)i(whic)m -(h)150 2037 y(should)c(b)s(e)h(corrected.)150 2194 y(Instead)36 -b(of)g(asking)h(to)g(\014nish,)f(the)h(calling)g(program)f(ma)m(y)h -(ask)f Fi(BZ2_bzCompress)c Fj(to)37 b(tak)m(e)h(all)f(the)150 -2304 y(remaining)f(input,)g(compress)f(it)h(and)f(terminate)h(the)g -(curren)m(t)g(\(Burro)m(ws-Wheeler\))h(compression)150 -2413 y(blo)s(c)m(k.)j(This)25 b(could)h(b)s(e)g(useful)f(for)h(error)g -(con)m(trol)i(purp)s(oses.)38 b(The)26 b(mec)m(hanism)g(is)g(analogous) -h(to)f(that)150 2523 y(for)35 b(\014nishing:)48 b(call)35 -b Fi(BZ2_bzCompress)c Fj(with)k(an)f(action)i(of)f Fi(BZ_FLUSH)p -Fj(,)f(remo)m(v)m(e)i(output)f(data,)h(and)150 2632 y(p)s(ersist)i -(with)g(the)h Fi(BZ_FLUSH)d Fj(action)k(un)m(til)e(the)h(v)-5 -b(alue)39 b Fi(BZ_RUN)d Fj(is)j(returned.)64 b(As)38 -b(with)g(\014nishing,)150 2742 y Fi(BZ2_bzCompress)21 -b Fj(detects)27 b(an)m(y)f(attempt)g(to)h(pro)m(vide)e(more)h(input)f -(data)g(once)i(the)e(\015ush)f(has)h(b)s(egun.)150 2899 -y(Once)31 b(the)g(\015ush)d(is)j(complete,)h(the)f(stream)f(returns)g -(to)i(the)e(normal)h(R)m(UNNING)h(state.)150 3056 y(This)f(all)g -(sounds)f(prett)m(y)i(complex,)g(but)f(isn't)g(really)-8 -b(.)44 b(Here's)32 b(a)f(table)h(whic)m(h)e(sho)m(ws)h(whic)m(h)g -(actions)150 3165 y(are)d(allo)m(w)m(able)i(in)e(eac)m(h)h(state,)h -(what)d(action)j(will)e(b)s(e)f(tak)m(en,)j(what)d(the)i(next)f(state)h -(is,)g(and)e(what)h(the)150 3275 y(non-error)g(return)f(v)-5 -b(alues)28 b(are.)40 b(Note)30 b(that)e(y)m(ou)g(can't)g(explicitly)i -(ask)d(what)h(state)h(the)f(stream)g(is)g(in,)150 3384 -y(but)h(nor)h(do)f(y)m(ou)h(need)g(to)g({)h(it)f(can)g(b)s(e)f -(inferred)g(from)h(the)g(v)-5 b(alues)30 b(returned)f(b)m(y)h -Fi(BZ2_bzCompress)p Fj(.)390 3535 y(IDLE/)p Fi(any)572 -3639 y Fj(Illegal.)63 b(IDLE)29 b(state)j(only)e(exists)h(after)g -Fi(BZ2_bzCompressEnd)26 b Fj(or)572 3743 y(b)s(efore)k -Fi(BZ2_bzCompressInit)p Fj(.)572 3847 y(Return)g(v)-5 -b(alue)31 b(=)f Fi(BZ_SEQUENCE_ERROR)390 4054 y Fj(R)m(UNNING/)p -Fi(BZ_RUN)572 4158 y Fj(Compress)g(from)g Fi(next_in)e -Fj(to)j Fi(next_out)d Fj(as)j(m)m(uc)m(h)f(as)h(p)s(ossible.)572 -4262 y(Next)g(state)h(=)e(R)m(UNNING)572 4366 y(Return)g(v)-5 -b(alue)31 b(=)f Fi(BZ_RUN_OK)390 4573 y Fj(R)m(UNNING/)p -Fi(BZ_FLUSH)572 4677 y Fj(Remem)m(b)s(er)g(curren)m(t)h(v)-5 -b(alue)31 b(of)f Fi(next_in)p Fj(.)59 b(Compress)30 b(from)g -Fi(next_in)572 4781 y Fj(to)h Fi(next_out)d Fj(as)j(m)m(uc)m(h)f(as)g -(p)s(ossible,)g(but)g(do)h(not)f(accept)i(an)m(y)e(more)h(input.)572 -4885 y(Next)g(state)h(=)e(FLUSHING)572 4988 y(Return)g(v)-5 -b(alue)31 b(=)f Fi(BZ_FLUSH_OK)390 5196 y Fj(R)m(UNNING/)p -Fi(BZ_FINISH)572 5300 y Fj(Remem)m(b)s(er)g(curren)m(t)h(v)-5 -b(alue)31 b(of)f Fi(next_in)p Fj(.)59 b(Compress)30 b(from)g -Fi(next_in)p eop -%%Page: 17 18 -17 17 bop 150 -116 a Fl(Chapter)30 b(3:)41 b(Programming)29 -b(with)g Fi(libbzip2)1881 b Fl(17)572 299 y Fj(to)31 -b Fi(next_out)d Fj(as)j(m)m(uc)m(h)f(as)g(p)s(ossible,)g(but)g(do)h -(not)f(accept)i(an)m(y)e(more)h(input.)572 403 y(Next)g(state)h(=)e -(FINISHING)572 506 y(Return)g(v)-5 b(alue)31 b(=)f Fi(BZ_FINISH_OK)390 -714 y Fj(FLUSHING/)p Fi(BZ_FLUSH)572 818 y Fj(Compress)g(from)g -Fi(next_in)e Fj(to)j Fi(next_out)d Fj(as)j(m)m(uc)m(h)f(as)h(p)s -(ossible,)572 922 y(but)f(do)g(not)h(accept)g(an)m(y)g(more)g(input.) -572 1025 y(If)f(all)h(the)g(existing)g(input)f(has)g(b)s(een)f(used)h -(up)f(and)h(all)h(compressed)572 1129 y(output)f(has)g(b)s(een)g(remo)m -(v)m(ed)663 1233 y(Next)h(state)h(=)e(R)m(UNNING;)i(Return)e(v)-5 -b(alue)31 b(=)f Fi(BZ_RUN_OK)572 1337 y Fj(else)663 1440 -y(Next)h(state)h(=)e(FLUSHING;)g(Return)h(v)-5 b(alue)30 -b(=)g Fi(BZ_FLUSH_OK)390 1648 y Fj(FLUSHING/other)572 -1752 y(Illegal.)572 1856 y(Return)g(v)-5 b(alue)31 b(=)f -Fi(BZ_SEQUENCE_ERROR)390 2063 y Fj(FINISHING/)p Fi(BZ_FINISH)572 -2167 y Fj(Compress)g(from)g Fi(next_in)e Fj(to)j Fi(next_out)d -Fj(as)j(m)m(uc)m(h)f(as)h(p)s(ossible,)572 2271 y(but)f(to)h(not)g -(accept)g(an)m(y)g(more)g(input.)572 2374 y(If)f(all)h(the)g(existing)g -(input)f(has)g(b)s(een)f(used)h(up)f(and)h(all)h(compressed)572 -2478 y(output)f(has)g(b)s(een)g(remo)m(v)m(ed)663 2582 -y(Next)h(state)h(=)e(IDLE;)f(Return)i(v)-5 b(alue)30 -b(=)g Fi(BZ_STREAM_END)572 2686 y Fj(else)663 2790 y(Next)h(state)h(=)e -(FINISHING;)h(Return)g(v)-5 b(alue)30 b(=)g Fi(BZ_FINISHING)390 -2997 y Fj(FINISHING/other)572 3101 y(Illegal.)572 3205 -y(Return)g(v)-5 b(alue)31 b(=)f Fi(BZ_SEQUENCE_ERROR)150 -3361 y Fj(That)23 b(still)h(lo)s(oks)g(complicated?)39 -b(W)-8 b(ell,)26 b(fair)d(enough.)39 b(The)23 b(usual)f(sequence)h(of)h -(calls)g(for)f(compressing)150 3471 y(a)30 b(load)h(of)g(data)f(is:)225 -3628 y Fh(\017)60 b Fj(Get)32 b(started)f(with)f Fi(BZ2_bzCompressInit) -p Fj(.)225 3774 y Fh(\017)60 b Fj(Sho)m(v)m(el)37 b(data)g(in)f(and)g -(shlurp)f(out)i(its)f(compressed)h(form)f(using)g(zero)i(or)e(more)h -(calls)h(of)e Fi(BZ2_)330 3884 y(bzCompress)28 b Fj(with)i(action)h(=)f -Fi(BZ_RUN)p Fj(.)225 4030 y Fh(\017)60 b Fj(Finish)24 -b(up.)38 b(Rep)s(eatedly)25 b(call)h Fi(BZ2_bzCompress)21 -b Fj(with)k(action)h(=)e Fi(BZ_FINISH)p Fj(,)g(cop)m(ying)h(out)h(the) -330 4139 y(compressed)k(output,)h(un)m(til)g Fi(BZ_STREAM_END)26 -b Fj(is)31 b(returned.)225 4285 y Fh(\017)60 b Fj(Close)31 -b(up)e(and)h(go)h(home.)41 b(Call)30 b Fi(BZ2_bzCompressEnd)p -Fj(.)150 4478 y(If)23 b(the)g(data)g(y)m(ou)h(w)m(an)m(t)g(to)f -(compress)g(\014ts)g(in)m(to)h(y)m(our)g(input)e(bu\013er)g(all)i(at)f -(once,)j(y)m(ou)d(can)g(skip)g(the)g(calls)150 4588 y(of)37 -b Fi(BZ2_bzCompress)26 b(\()k(...,)f(BZ_RUN)g(\))36 b -Fj(and)g(just)g(do)g(the)h Fi(BZ2_bzCompress)26 b(\()k(...,)f -(BZ_FINISH)150 4698 y(\))h Fj(calls.)150 4854 y(All)35 -b(required)g(memory)h(is)f(allo)s(cated)i(b)m(y)e Fi -(BZ2_bzCompressInit)p Fj(.)51 b(The)35 b(compression)h(library)f(can) -150 4964 y(accept)f(an)m(y)e(data)h(at)g(all)h(\(ob)m(viously\).)48 -b(So)32 b(y)m(ou)h(shouldn't)f(get)i(an)m(y)f(error)g(return)f(v)-5 -b(alues)32 b(from)h(the)150 5074 y Fi(BZ2_bzCompress)28 -b Fj(calls.)48 b(If)32 b(y)m(ou)g(do,)h(they)g(will)g(b)s(e)e -Fi(BZ_SEQUENCE_ERROR)p Fj(,)e(and)i(indicate)i(a)g(bug)e(in)150 -5183 y(y)m(our)g(programming.)150 5340 y(T)-8 b(rivial)32 -b(other)f(p)s(ossible)f(return)g(v)-5 b(alues:)p eop -%%Page: 18 19 -18 18 bop 150 -116 a Fl(Chapter)30 b(3:)41 b(Programming)29 -b(with)g Fi(libbzip2)1881 b Fl(18)572 299 y Fi(BZ_PARAM_ERROR)663 -403 y Fj(if)30 b Fi(strm)f Fj(is)i Fi(NULL)p Fj(,)e(or)i -Fi(strm->s)d Fj(is)j Fi(NULL)150 652 y Fe(3.3.3)63 b -Fd(BZ2_bzCompressEnd)390 839 y Fi(int)47 b(BZ2_bzCompressEnd)c(\()k -(bz_stream)f(*strm)g(\);)150 996 y Fj(Releases)32 b(all)f(memory)f -(asso)s(ciated)i(with)e(a)h(compression)f(stream.)150 -1153 y(P)m(ossible)i(return)e(v)-5 b(alues:)481 1304 -y Fi(BZ_PARAM_ERROR)117 b Fj(if)31 b Fi(strm)e Fj(is)h -Fi(NULL)f Fj(or)i Fi(strm->s)d Fj(is)j Fi(NULL)481 1408 -y(BZ_OK)120 b Fj(otherwise)150 1657 y Fe(3.3.4)63 b Fd -(BZ2_bzDecompressInit)390 1844 y Fi(int)47 b(BZ2_bzDecompressInit)42 -b(\()48 b(bz_stream)d(*strm,)h(int)h(verbosity,)e(int)i(small)f(\);)150 -2001 y Fj(Prepares)39 b(for)f(decompression.)65 b(As)38 -b(with)h Fi(BZ2_bzCompressInit)p Fj(,)c(a)k Fi(bz_stream)d -Fj(record)j(should)150 2110 y(b)s(e)30 b(allo)s(cated)j(and)d -(initialised)i(b)s(efore)f(the)g(call.)44 b(Fields)31 -b Fi(bzalloc)p Fj(,)e Fi(bzfree)g Fj(and)h Fi(opaque)g -Fj(should)g(b)s(e)150 2220 y(set)35 b(if)f(a)g(custom)h(memory)f(allo)s -(cator)i(is)e(required,)h(or)g(made)f Fi(NULL)f Fj(for)h(the)g(normal)g -Fi(malloc)p Fj(/)p Fi(free)150 2330 y Fj(routines.)72 -b(Up)s(on)40 b(return,)j(the)e(in)m(ternal)h(state)g(will)g(ha)m(v)m(e) -f(b)s(een)f(initialised,)45 b(and)40 b Fi(total_in)f -Fj(and)150 2439 y Fi(total_out)28 b Fj(will)j(b)s(e)e(zero.)150 -2596 y(F)-8 b(or)31 b(the)g(meaning)f(of)h(parameter)g -Fi(verbosity)p Fj(,)d(see)j Fi(BZ2_bzCompressInit)p Fj(.)150 -2753 y(If)26 b Fi(small)f Fj(is)i(nonzero,)h(the)f(library)f(will)h -(use)f(an)g(alternativ)m(e)j(decompression)e(algorithm)g(whic)m(h)f -(uses)150 2862 y(less)42 b(memory)h(but)e(at)i(the)f(cost)i(of)e -(decompressing)g(more)h(slo)m(wly)g(\(roughly)f(sp)s(eaking,)j(half)d -(the)150 2972 y(sp)s(eed,)29 b(but)f(the)h(maxim)m(um)f(memory)i -(requiremen)m(t)f(drops)f(to)i(around)e(2300k\).)42 b(See)29 -b(Chapter)g(2)g(for)150 3082 y(more)i(information)g(on)f(memory)g -(managemen)m(t.)150 3238 y(Note)37 b(that)f(the)g(amoun)m(t)g(of)g -(memory)g(needed)f(to)h(decompress)g(a)g(stream)g(cannot)g(b)s(e)f -(determined)150 3348 y(un)m(til)41 b(the)f(stream's)h(header)f(has)g(b) -s(een)f(read,)k(so)e(ev)m(en)g(if)f Fi(BZ2_bzDecompressInit)35 -b Fj(succeeds,)43 b(a)150 3458 y(subsequen)m(t)30 b Fi -(BZ2_bzDecompress)c Fj(could)k(fail)h(with)f Fi(BZ_MEM_ERROR)p -Fj(.)150 3614 y(P)m(ossible)i(return)e(v)-5 b(alues:)572 -3765 y Fi(BZ_CONFIG_ERROR)663 3869 y Fj(if)30 b(the)h(library)f(has)g -(b)s(een)g(mis-compiled)572 3973 y Fi(BZ_PARAM_ERROR)663 -4077 y Fj(if)g Fi(\(small)46 b(!=)h(0)h(&&)f(small)f(!=)h(1\))663 -4181 y Fj(or)30 b Fi(\(verbosity)45 b(<)j(0)f(||)g(verbosity)f(>)h(4\)) -572 4284 y(BZ_MEM_ERROR)663 4388 y Fj(if)30 b(insu\016cien)m(t)h -(memory)f(is)h(a)m(v)-5 b(ailable)150 4545 y(Allo)m(w)m(able)32 -b(next)f(actions:)572 4696 y Fi(BZ2_bzDecompress)663 -4800 y Fj(if)f Fi(BZ_OK)f Fj(w)m(as)h(returned)572 4904 -y(no)g(sp)s(eci\014c)g(action)i(required)e(in)g(case)h(of)g(error)150 -5153 y Fe(3.3.5)63 b Fd(BZ2_bzDecompress)390 5340 y Fi(int)47 -b(BZ2_bzDecompress)c(\()48 b(bz_stream)d(*strm)h(\);)p -eop -%%Page: 19 20 -19 19 bop 150 -116 a Fl(Chapter)30 b(3:)41 b(Programming)29 -b(with)g Fi(libbzip2)1881 b Fl(19)150 299 y Fj(Pro)m(vides)41 -b(more)g(input)f(and/out)g(output)h(bu\013er)e(space)i(for)f(the)h -(library)-8 b(.)71 b(The)41 b(caller)h(main)m(tains)150 -408 y(input)30 b(and)f(output)h(bu\013ers,)g(and)g(uses)g -Fi(BZ2_bzDecompress)25 b Fj(to)32 b(transfer)e(data)h(b)s(et)m(w)m(een) -g(them.)150 565 y(Before)45 b(eac)m(h)h(call)f(to)h Fi -(BZ2_bzDecompress)p Fj(,)e Fi(next_in)e Fj(should)i(p)s(oin)m(t)h(at)g -(the)g(compressed)f(data,)150 675 y(and)33 b Fi(avail_in)e -Fj(should)i(indicate)i(ho)m(w)e(man)m(y)h(b)m(ytes)g(the)g(library)g -(ma)m(y)g(read.)50 b Fi(BZ2_bzDecompress)150 784 y Fj(up)s(dates)29 -b Fi(next_in)p Fj(,)g Fi(avail_in)f Fj(and)i Fi(total_in)e -Fj(to)j(re\015ect)g(the)g(n)m(um)m(b)s(er)e(of)i(b)m(ytes)g(it)g(has)f -(read.)150 941 y(Similarly)-8 b(,)40 b Fi(next_out)34 -b Fj(should)i(p)s(oin)m(t)h(to)h(a)f(bu\013er)f(in)h(whic)m(h)g(the)g -(uncompressed)f(output)h(is)g(to)h(b)s(e)150 1051 y(placed,)f(with)e -Fi(avail_out)e Fj(indicating)j(ho)m(w)f(m)m(uc)m(h)h(output)f(space)g -(is)h(a)m(v)-5 b(ailable.)57 b Fi(BZ2_bzCompress)150 -1160 y Fj(up)s(dates)29 b Fi(next_out)p Fj(,)g Fi(avail_out)f -Fj(and)h Fi(total_out)f Fj(to)j(re\015ect)h(the)e(n)m(um)m(b)s(er)g(of) -g(b)m(ytes)h(output.)150 1317 y(Y)-8 b(ou)39 b(ma)m(y)g(pro)m(vide)g -(and)f(remo)m(v)m(e)i(as)f(little)i(or)e(as)g(m)m(uc)m(h)f(data)h(as)g -(y)m(ou)g(lik)m(e)h(on)f(eac)m(h)g(call)h(of)f Fi(BZ2_)150 -1427 y(bzDecompress)p Fj(.)d(In)27 b(the)h(limit,)h(it)f(is)f -(acceptable)i(to)f(supply)e(and)g(remo)m(v)m(e)j(data)f(one)g(b)m(yte)g -(at)f(a)h(time,)150 1537 y(although)f(this)g(w)m(ould)f(b)s(e)g -(terribly)i(ine\016cien)m(t.)40 b(Y)-8 b(ou)27 b(should)f(alw)m(a)m(ys) -i(ensure)e(that)h(at)g(least)h(one)f(b)m(yte)150 1646 -y(of)k(output)f(space)g(is)h(a)m(v)-5 b(ailable)32 b(at)f(eac)m(h)g -(call.)150 1803 y(Use)g(of)f Fi(BZ2_bzDecompress)c Fj(is)k(simpler)h -(than)f Fi(BZ2_bzCompress)p Fj(.)150 1960 y(Y)-8 b(ou)28 -b(should)f(pro)m(vide)i(input)f(and)f(remo)m(v)m(e)j(output)e(as)g -(describ)s(ed)g(ab)s(o)m(v)m(e,)i(and)d(rep)s(eatedly)i(call)g -Fi(BZ2_)150 2069 y(bzDecompress)k Fj(un)m(til)k Fi(BZ_STREAM_END)c -Fj(is)k(returned.)59 b(App)s(earance)36 b(of)g Fi(BZ_STREAM_END)d -Fj(denotes)150 2179 y(that)45 b Fi(BZ2_bzDecompress)40 -b Fj(has)45 b(detected)h(the)f(logical)i(end)d(of)g(the)h(compressed)g -(stream.)84 b Fi(BZ2_)150 2289 y(bzDecompress)27 b Fj(will)k(not)g(pro) -s(duce)e Fi(BZ_STREAM_END)e Fj(un)m(til)k(all)g(output)g(data)f(has)g -(b)s(een)g(placed)h(in)m(to)150 2398 y(the)k(output)f(bu\013er,)i(so)e -(once)i Fi(BZ_STREAM_END)31 b Fj(app)s(ears,)k(y)m(ou)g(are)g(guaran)m -(teed)g(to)h(ha)m(v)m(e)f(a)m(v)-5 b(ailable)150 2508 -y(all)31 b(the)g(decompressed)f(output,)h(and)e Fi(BZ2_bzDecompressEnd) -c Fj(can)31 b(safely)f(b)s(e)g(called.)150 2665 y(If)38 -b(case)i(of)f(an)f(error)h(return)f(v)-5 b(alue,)42 b(y)m(ou)d(should)e -(call)j Fi(BZ2_bzDecompressEnd)33 b Fj(to)40 b(clean)f(up)f(and)150 -2774 y(release)32 b(memory)-8 b(.)150 2931 y(P)m(ossible)32 -b(return)e(v)-5 b(alues:)572 3082 y Fi(BZ_PARAM_ERROR)663 -3186 y Fj(if)30 b Fi(strm)f Fj(is)i Fi(NULL)e Fj(or)i -Fi(strm->s)d Fj(is)i Fi(NULL)663 3290 y Fj(or)g Fi(strm->avail_out)44 -b(<)j(1)572 3393 y(BZ_DATA_ERROR)663 3497 y Fj(if)30 -b(a)h(data)f(in)m(tegrit)m(y)j(error)e(is)f(detected)i(in)e(the)h -(compressed)g(stream)572 3601 y Fi(BZ_DATA_ERROR_MAGIC)663 -3705 y Fj(if)f(the)h(compressed)f(stream)h(do)s(esn't)f(b)s(egin)g -(with)h(the)f(righ)m(t)i(magic)f(b)m(ytes)572 3808 y -Fi(BZ_MEM_ERROR)663 3912 y Fj(if)f(there)h(w)m(asn't)g(enough)f(memory) -h(a)m(v)-5 b(ailable)572 4016 y Fi(BZ_STREAM_END)663 -4120 y Fj(if)30 b(the)h(logical)h(end)e(of)h(the)f(data)h(stream)g(w)m -(as)f(detected)i(and)e(all)663 4224 y(output)g(in)g(has)g(b)s(een)g -(consumed,)g(eg)h Fi(s->avail_out)44 b(>)k(0)572 4327 -y(BZ_OK)663 4431 y Fj(otherwise)150 4588 y(Allo)m(w)m(able)32 -b(next)f(actions:)572 4739 y Fi(BZ2_bzDecompress)663 -4843 y Fj(if)f Fi(BZ_OK)f Fj(w)m(as)h(returned)572 4946 -y Fi(BZ2_bzDecompressEnd)663 5050 y Fj(otherwise)p eop -%%Page: 20 21 -20 20 bop 150 -116 a Fl(Chapter)30 b(3:)41 b(Programming)29 -b(with)g Fi(libbzip2)1881 b Fl(20)150 299 y Fe(3.3.6)63 -b Fd(BZ2_bzDecompressEnd)390 486 y Fi(int)47 b(BZ2_bzDecompressEnd)42 -b(\()48 b(bz_stream)d(*strm)i(\);)150 643 y Fj(Releases)32 -b(all)f(memory)f(asso)s(ciated)i(with)e(a)h(decompression)f(stream.)150 -799 y(P)m(ossible)i(return)e(v)-5 b(alues:)572 950 y -Fi(BZ_PARAM_ERROR)663 1054 y Fj(if)30 b Fi(strm)f Fj(is)i -Fi(NULL)e Fj(or)i Fi(strm->s)d Fj(is)i Fi(NULL)572 1158 -y(BZ_OK)663 1262 y Fj(otherwise)150 1419 y(Allo)m(w)m(able)i(next)f -(actions:)572 1570 y(None.)150 1857 y Fk(3.4)68 b(High-lev)l(el)47 -b(in)l(terface)150 2050 y Fj(This)35 b(in)m(terface)i(pro)m(vides)e -(functions)g(for)g(reading)g(and)f(writing)i Fi(bzip2)d -Fj(format)j(\014les.)55 b(First,)37 b(some)150 2159 y(general)31 -b(p)s(oin)m(ts.)225 2316 y Fh(\017)60 b Fj(All)35 b(of)g(the)h -(functions)e(tak)m(e)j(an)d Fi(int*)g Fj(\014rst)h(argumen)m(t,)i -Fi(bzerror)p Fj(.)52 b(After)36 b(eac)m(h)f(call,)j Fi(bzerror)330 -2426 y Fj(should)23 b(b)s(e)f(consulted)i(\014rst)f(to)i(determine)f -(the)g(outcome)h(of)e(the)h(call.)40 b(If)23 b Fi(bzerror)e -Fj(is)j Fi(BZ_OK)p Fj(,)g(the)330 2535 y(call)35 b(completed)g -(successfully)-8 b(,)36 b(and)d(only)i(then)e(should)h(the)g(return)g -(v)-5 b(alue)34 b(of)g(the)h(function)f(\(if)330 2645 -y(an)m(y\))27 b(b)s(e)e(consulted.)40 b(If)26 b Fi(bzerror)f -Fj(is)h Fi(BZ_IO_ERROR)p Fj(,)e(there)j(w)m(as)g(an)e(error)i -(reading/writing)h(the)330 2754 y(underlying)k(compressed)h(\014le,)h -(and)e(y)m(ou)h(should)f(then)h(consult)g Fi(errno)p -Fj(/)p Fi(perror)e Fj(to)i(determine)330 2864 y(the)k(cause)f(of)h(the) -f(di\016cult)m(y)-8 b(.)60 b Fi(bzerror)34 b Fj(ma)m(y)j(also)g(b)s(e)e -(set)i(to)g(v)-5 b(arious)37 b(other)g(v)-5 b(alues;)39 -b(precise)330 2974 y(details)31 b(are)g(giv)m(en)g(on)g(a)f(p)s -(er-function)g(basis)g(b)s(elo)m(w.)225 3111 y Fh(\017)60 -b Fj(If)39 b Fi(bzerror)e Fj(indicates)i(an)g(error)g(\(ie,)k(an)m -(ything)c(except)h Fi(BZ_OK)d Fj(and)i Fi(BZ_STREAM_END)p -Fj(\),)f(y)m(ou)330 3220 y(should)56 b(immediately)i(call)g -Fi(BZ2_bzReadClose)53 b Fj(\(or)58 b Fi(BZ2_bzWriteClose)p -Fj(,)h(dep)s(ending)d(on)330 3330 y(whether)37 b(y)m(ou)h(are)f -(attempting)i(to)f(read)f(or)h(to)g(write\))g(to)g(free)g(up)e(all)i -(resources)g(asso)s(ciated)330 3439 y(with)44 b(the)g(stream.)83 -b(Once)44 b(an)g(error)h(has)e(b)s(een)h(indicated,)k(b)s(eha)m(viour)c -(of)g(all)h(calls)g(except)330 3549 y Fi(BZ2_bzReadClose)h -Fj(\()p Fi(BZ2_bzWriteClose)p Fj(\))h(is)j(unde\014ned.)99 -b(The)51 b(implication)g(is)g(that)g(\(1\))330 3659 y -Fi(bzerror)42 b Fj(should)i(b)s(e)f(c)m(hec)m(k)m(ed)k(after)e(eac)m(h) -g(call,)k(and)43 b(\(2\))j(if)e Fi(bzerror)e Fj(indicates)j(an)f -(error,)330 3768 y Fi(BZ2_bzReadClose)26 b Fj(\()p Fi(BZ2_bzWriteClose) -p Fj(\))h(should)i(then)h(b)s(e)g(called)i(to)f(clean)g(up.)225 -3905 y Fh(\017)60 b Fj(The)32 b Fi(FILE*)f Fj(argumen)m(ts)i(passed)f -(to)h Fi(BZ2_bzReadOpen)p Fj(/)p Fi(BZ2_bzWriteO)o(pen)26 -b Fj(should)31 b(b)s(e)h(set)h(to)330 4015 y(binary)22 -b(mo)s(de.)38 b(Most)24 b(Unix)f(systems)g(will)g(do)g(this)g(b)m(y)g -(default,)i(but)d(other)i(platforms,)g(including)330 -4124 y(Windo)m(ws)41 b(and)g(Mac,)46 b(will)c(not.)76 -b(If)41 b(y)m(ou)i(omit)f(this,)j(y)m(ou)e(ma)m(y)f(encoun)m(ter)h -(problems)e(when)330 4234 y(mo)m(ving)31 b(co)s(de)g(to)g(new)f -(platforms.)225 4371 y Fh(\017)60 b Fj(Memory)45 b(allo)s(cation)h -(requests)f(are)g(handled)e(b)m(y)h Fi(malloc)p Fj(/)p -Fi(free)p Fj(.)80 b(A)m(t)45 b(presen)m(t)g(there)g(is)f(no)330 -4481 y(facilit)m(y)39 b(for)f(user-de\014ned)e(memory)h(allo)s(cators)j -(in)d(the)g(\014le)h(I/O)g(functions)f(\(could)g(easily)i(b)s(e)330 -4590 y(added,)30 b(though\).)150 4842 y Fe(3.4.1)63 b -Fd(BZ2_bzReadOpen)533 5029 y Fi(typedef)46 b(void)h(BZFILE;)533 -5236 y(BZFILE)f(*BZ2_bzReadOpen)e(\()j(int)g(*bzerror,)f(FILE)g(*f,) -1726 5340 y(int)h(small,)f(int)h(verbosity,)p eop -%%Page: 21 22 -21 21 bop 150 -116 a Fl(Chapter)30 b(3:)h(Programming)e(with)g -Fi(libbzip2)1891 b Fl(21)1726 299 y Fi(void)47 b(*unused,)f(int)g -(nUnused)g(\);)150 456 y Fj(Prepare)27 b(to)g(read)f(compressed)h(data) -g(from)f(\014le)g(handle)g Fi(f)p Fj(.)39 b Fi(f)26 b -Fj(should)g(refer)g(to)i(a)e(\014le)h(whic)m(h)f(has)g(b)s(een)150 -565 y(op)s(ened)i(for)g(reading,)i(and)d(for)i(whic)m(h)f(the)h(error)g -(indicator)h(\()p Fi(ferror\(f\))p Fj(\)is)c(not)j(set.)41 -b(If)29 b Fi(small)e Fj(is)h(1,)150 675 y(the)j(library)f(will)h(try)f -(to)i(decompress)e(using)g(less)h(memory)-8 b(,)31 b(at)g(the)f(exp)s -(ense)g(of)h(sp)s(eed.)150 832 y(F)-8 b(or)37 b(reasons)f(explained)h -(b)s(elo)m(w,)h Fi(BZ2_bzRead)33 b Fj(will)k(decompress)f(the)h -Fi(nUnused)d Fj(b)m(ytes)j(starting)g(at)150 941 y Fi(unused)p -Fj(,)j(b)s(efore)f(starting)h(to)g(read)f(from)g(the)h(\014le)f -Fi(f)p Fj(.)67 b(A)m(t)40 b(most)g Fi(BZ_MAX_UNUSED)35 -b Fj(b)m(ytes)40 b(ma)m(y)g(b)s(e)150 1051 y(supplied)34 -b(lik)m(e)i(this.)54 b(If)34 b(this)h(facilit)m(y)i(is)e(not)g -(required,)h(y)m(ou)f(should)f(pass)g Fi(NULL)f Fj(and)h -Fi(0)h Fj(for)g Fi(unused)150 1160 y Fj(and)30 b(n)p -Fi(Unused)e Fj(resp)s(ectiv)m(ely)-8 b(.)150 1317 y(F)g(or)31 -b(the)g(meaning)f(of)h(parameters)g Fi(small)e Fj(and)g -Fi(verbosity)p Fj(,)f(see)j Fi(BZ2_bzDecompressInit)p -Fj(.)150 1474 y(The)i(amoun)m(t)g(of)f(memory)h(needed)g(to)g -(decompress)g(a)g(\014le)g(cannot)g(b)s(e)f(determined)h(un)m(til)g -(the)g(\014le's)150 1584 y(header)h(has)f(b)s(een)g(read.)51 -b(So)33 b(it)i(is)e(p)s(ossible)h(that)g Fi(BZ2_bzReadOpen)c -Fj(returns)j Fi(BZ_OK)g Fj(but)g(a)h(subse-)150 1693 -y(quen)m(t)d(call)g(of)g Fi(BZ2_bzRead)c Fj(will)k(return)f -Fi(BZ_MEM_ERROR)p Fj(.)150 1850 y(P)m(ossible)i(assignmen)m(ts)e(to)i -Fi(bzerror)p Fj(:)572 2001 y Fi(BZ_CONFIG_ERROR)663 2105 -y Fj(if)e(the)h(library)f(has)g(b)s(een)g(mis-compiled)572 -2209 y Fi(BZ_PARAM_ERROR)663 2313 y Fj(if)g Fi(f)g Fj(is)h -Fi(NULL)663 2416 y Fj(or)f Fi(small)f Fj(is)i(neither)g -Fi(0)f Fj(nor)g Fi(1)663 2520 y Fj(or)g Fi(\(unused)46 -b(==)h(NULL)g(&&)g(nUnused)f(!=)h(0\))663 2624 y Fj(or)30 -b Fi(\(unused)46 b(!=)h(NULL)g(&&)g(!\(0)g(<=)g(nUnused)f(<=)h -(BZ_MAX_UNUSED\)\))572 2728 y(BZ_IO_ERROR)663 2831 y -Fj(if)30 b Fi(ferror\(f\))e Fj(is)i(nonzero)572 2935 -y Fi(BZ_MEM_ERROR)663 3039 y Fj(if)g(insu\016cien)m(t)h(memory)f(is)h -(a)m(v)-5 b(ailable)572 3143 y Fi(BZ_OK)663 3247 y Fj(otherwise.)150 -3403 y(P)m(ossible)32 b(return)e(v)-5 b(alues:)572 3554 -y(P)m(oin)m(ter)32 b(to)f(an)f(abstract)i Fi(BZFILE)663 -3658 y Fj(if)e Fi(bzerror)e Fj(is)j Fi(BZ_OK)572 3762 -y(NULL)663 3866 y Fj(otherwise)150 4023 y(Allo)m(w)m(able)h(next)f -(actions:)572 4174 y Fi(BZ2_bzRead)663 4277 y Fj(if)f -Fi(bzerror)e Fj(is)j Fi(BZ_OK)572 4381 y(BZ2_bzClose)663 -4485 y Fj(otherwise)150 4887 y Fe(3.4.2)63 b Fd(BZ2_bzRead)533 -5074 y Fi(int)47 b(BZ2_bzRead)e(\()j(int)e(*bzerror,)g(BZFILE)g(*b,)h -(void)f(*buf,)h(int)g(len)g(\);)150 5230 y Fj(Reads)33 -b(up)e(to)j Fi(len)d Fj(\(uncompressed\))i(b)m(ytes)g(from)f(the)h -(compressed)g(\014le)g Fi(b)f Fj(in)m(to)i(the)f(bu\013er)f -Fi(buf)p Fj(.)46 b(If)150 5340 y(the)28 b(read)f(w)m(as)h(successful,)g -Fi(bzerror)d Fj(is)j(set)g(to)g Fi(BZ_OK)f Fj(and)f(the)i(n)m(um)m(b)s -(er)e(of)i(b)m(ytes)g(read)g(is)f(returned.)p eop -%%Page: 22 23 -22 22 bop 150 -116 a Fl(Chapter)30 b(3:)41 b(Programming)29 -b(with)g Fi(libbzip2)1881 b Fl(22)150 299 y Fj(If)32 -b(the)g(logical)i(end-of-stream)e(w)m(as)g(detected,)i -Fi(bzerror)c Fj(will)i(b)s(e)g(set)g(to)h Fi(BZ_STREAM_END)p -Fj(,)c(and)i(the)150 408 y(n)m(um)m(b)s(er)e(of)i(b)m(ytes)g(read)f(is) -h(returned.)40 b(All)30 b(other)h Fi(bzerror)e Fj(v)-5 -b(alues)30 b(denote)h(an)f(error.)150 565 y Fi(BZ2_bzRead)k -Fj(will)k(supply)e Fi(len)g Fj(b)m(ytes,)k(unless)d(the)g(logical)j -(stream)d(end)g(is)g(detected)i(or)f(an)e(error)150 675 -y(o)s(ccurs.)70 b(Because)40 b(of)g(this,)j(it)e(is)f(p)s(ossible)f(to) -i(detect)h(the)e(stream)h(end)e(b)m(y)h(observing)h(when)e(the)150 -784 y(n)m(um)m(b)s(er)24 b(of)h(b)m(ytes)h(returned)e(is)h(less)h(than) -e(the)i(n)m(um)m(b)s(er)e(requested.)39 b(Nev)m(ertheless,)29 -b(this)c(is)g(regarded)150 894 y(as)37 b(inadvisable;)i(y)m(ou)e -(should)f(instead)h(c)m(hec)m(k)h Fi(bzerror)c Fj(after)k(ev)m(ery)f -(call)h(and)e(w)m(atc)m(h)h(out)g(for)g Fi(BZ_)150 1004 -y(STREAM_END)p Fj(.)150 1160 y(In)m(ternally)-8 b(,)47 -b Fi(BZ2_bzRead)39 b Fj(copies)k(data)f(from)g(the)g(compressed)g -(\014le)g(in)g(c)m(h)m(unks)g(of)g(size)h Fi(BZ_MAX_)150 -1270 y(UNUSED)28 b Fj(b)m(ytes)j(b)s(efore)e(decompressing)h(it.)41 -b(If)30 b(the)g(\014le)g(con)m(tains)h(more)f(b)m(ytes)h(than)e -(strictly)i(needed)150 1380 y(to)46 b(reac)m(h)g(the)f(logical)i -(end-of-stream,)j Fi(BZ2_bzRead)42 b Fj(will)j(almost)h(certainly)g -(read)f(some)g(of)h(the)150 1489 y(trailing)f(data)g(b)s(efore)f -(signalling)h Fi(BZ_SEQUENCE_END)p Fj(.)78 b(T)-8 b(o)45 -b(collect)i(the)d(read)g(but)g(un)m(used)f(data)150 1599 -y(once)27 b Fi(BZ_SEQUENCE_END)22 b Fj(has)k(app)s(eared,)g(call)h -Fi(BZ2_bzReadGetUnused)21 b Fj(immediately)28 b(b)s(efore)e -Fi(BZ2_)150 1708 y(bzReadClose)p Fj(.)150 1865 y(P)m(ossible)32 -b(assignmen)m(ts)e(to)i Fi(bzerror)p Fj(:)572 2016 y -Fi(BZ_PARAM_ERROR)663 2120 y Fj(if)e Fi(b)g Fj(is)h Fi(NULL)e -Fj(or)h Fi(buf)g Fj(is)g Fi(NULL)f Fj(or)i Fi(len)47 -b(<)g(0)572 2224 y(BZ_SEQUENCE_ERROR)663 2328 y Fj(if)30 -b Fi(b)g Fj(w)m(as)h(op)s(ened)e(with)h Fi(BZ2_bzWriteOpen)572 -2431 y(BZ_IO_ERROR)663 2535 y Fj(if)g(there)h(is)f(an)g(error)h -(reading)g(from)f(the)h(compressed)f(\014le)572 2639 -y Fi(BZ_UNEXPECTED_EOF)663 2743 y Fj(if)g(the)h(compressed)f(\014le)h -(ended)e(b)s(efore)i(the)f(logical)j(end-of-stream)e(w)m(as)g(detected) -572 2847 y Fi(BZ_DATA_ERROR)663 2950 y Fj(if)f(a)h(data)f(in)m(tegrit)m -(y)j(error)e(w)m(as)g(detected)g(in)g(the)f(compressed)h(stream)572 -3054 y Fi(BZ_DATA_ERROR_MAGIC)663 3158 y Fj(if)f(the)h(stream)g(do)s -(es)f(not)g(b)s(egin)g(with)h(the)f(requisite)i(header)e(b)m(ytes)h -(\(ie,)h(is)e(not)663 3262 y(a)g Fi(bzip2)f Fj(data)i(\014le\).)61 -b(This)30 b(is)h(really)g(a)f(sp)s(ecial)h(case)g(of)g -Fi(BZ_DATA_ERROR)p Fj(.)572 3365 y Fi(BZ_MEM_ERROR)663 -3469 y Fj(if)f(insu\016cien)m(t)h(memory)f(w)m(as)h(a)m(v)-5 -b(ailable)572 3573 y Fi(BZ_STREAM_END)663 3677 y Fj(if)30 -b(the)h(logical)h(end)e(of)h(stream)g(w)m(as)f(detected.)572 -3781 y Fi(BZ_OK)663 3884 y Fj(otherwise.)150 4041 y(P)m(ossible)i -(return)e(v)-5 b(alues:)572 4192 y(n)m(um)m(b)s(er)29 -b(of)i(b)m(ytes)g(read)663 4296 y(if)f Fi(bzerror)e Fj(is)j -Fi(BZ_OK)e Fj(or)h Fi(BZ_STREAM_END)572 4400 y Fj(unde\014ned)663 -4503 y(otherwise)150 4660 y(Allo)m(w)m(able)i(next)f(actions:)572 -4811 y(collect)i(data)d(from)h Fi(buf)p Fj(,)e(then)i -Fi(BZ2_bzRead)c Fj(or)k Fi(BZ2_bzReadClose)663 4915 y -Fj(if)f Fi(bzerror)e Fj(is)j Fi(BZ_OK)572 5019 y Fj(collect)i(data)d -(from)h Fi(buf)p Fj(,)e(then)i Fi(BZ2_bzReadClose)26 -b Fj(or)k Fi(BZ2_bzReadGetUnused)663 5123 y Fj(if)g Fi(bzerror)e -Fj(is)j Fi(BZ_SEQUENCE_END)572 5226 y(BZ2_bzReadClose)663 -5330 y Fj(otherwise)p eop -%%Page: 23 24 -23 23 bop 150 -116 a Fl(Chapter)30 b(3:)41 b(Programming)29 -b(with)g Fi(libbzip2)1881 b Fl(23)150 299 y Fe(3.4.3)63 -b Fd(BZ2_bzReadGetUnused)533 486 y Fi(void)47 b(BZ2_bzReadGetUnused)42 -b(\()48 b(int*)e(bzerror,)g(BZFILE)g(*b,)1822 589 y(void**)g(unused,)g -(int*)g(nUnused)g(\);)150 746 y Fj(Returns)35 b(data)g(whic)m(h)f(w)m -(as)i(read)e(from)h(the)g(compressed)g(\014le)g(but)g(w)m(as)g(not)g -(needed)g(to)h(get)g(to)g(the)150 856 y(logical)44 b(end-of-stream.)76 -b Fi(*unused)39 b Fj(is)j(set)h(to)f(the)g(address)f(of)h(the)g(data,)k -(and)40 b Fi(*nUnused)g Fj(to)j(the)150 965 y(n)m(um)m(b)s(er)28 -b(of)h(b)m(ytes.)41 b Fi(*nUnused)27 b Fj(will)i(b)s(e)g(set)g(to)h(a)f -(v)-5 b(alue)30 b(b)s(et)m(w)m(een)g Fi(0)e Fj(and)g -Fi(BZ_MAX_UNUSED)e Fj(inclusiv)m(e.)150 1122 y(This)f(function)g(ma)m -(y)h(only)f(b)s(e)g(called)h(once)g Fi(BZ2_bzRead)d Fj(has)i(signalled) -h Fi(BZ_STREAM_END)21 b Fj(but)k(b)s(efore)150 1232 y -Fi(BZ2_bzReadClose)p Fj(.)150 1389 y(P)m(ossible)32 b(assignmen)m(ts)e -(to)i Fi(bzerror)p Fj(:)572 1540 y Fi(BZ_PARAM_ERROR)663 -1644 y Fj(if)e Fi(b)g Fj(is)h Fi(NULL)663 1747 y Fj(or)f -Fi(unused)f Fj(is)h Fi(NULL)g Fj(or)g Fi(nUnused)f Fj(is)h -Fi(NULL)572 1851 y(BZ_SEQUENCE_ERROR)663 1955 y Fj(if)g -Fi(BZ_STREAM_END)d Fj(has)j(not)g(b)s(een)g(signalled)663 -2059 y(or)g(if)h Fi(b)f Fj(w)m(as)g(op)s(ened)g(with)g -Fi(BZ2_bzWriteOpen)542 2162 y(BZ_OK)663 2266 y Fj(otherwise)150 -2423 y(Allo)m(w)m(able)i(next)f(actions:)572 2574 y Fi(BZ2_bzReadClose) -150 2882 y Fe(3.4.4)63 b Fd(BZ2_bzReadClose)533 3068 -y Fi(void)47 b(BZ2_bzReadClose)c(\()48 b(int)f(*bzerror,)e(BZFILE)h(*b) -h(\);)150 3225 y Fj(Releases)35 b(all)e(memory)h(p)s(ertaining)f(to)h -(the)f(compressed)h(\014le)f Fi(b)p Fj(.)49 b Fi(BZ2_bzReadClose)29 -b Fj(do)s(es)j(not)i(call)150 3335 y Fi(fclose)d Fj(on)h(the)g -(underlying)g(\014le)g(handle,)h(so)f(y)m(ou)h(should)e(do)h(that)h(y)m -(ourself)g(if)f(appropriate.)47 b Fi(BZ2_)150 3445 y(bzReadClose)27 -b Fj(should)j(b)s(e)f(called)j(to)f(clean)g(up)e(after)i(all)g(error)g -(situations.)150 3601 y(P)m(ossible)h(assignmen)m(ts)e(to)i -Fi(bzerror)p Fj(:)572 3752 y Fi(BZ_SEQUENCE_ERROR)663 -3856 y Fj(if)e Fi(b)g Fj(w)m(as)h(op)s(ened)e(with)h -Fi(BZ2_bzOpenWrite)572 3960 y(BZ_OK)663 4064 y Fj(otherwise)150 -4221 y(Allo)m(w)m(able)i(next)f(actions:)572 4372 y(none)150 -4679 y Fe(3.4.5)63 b Fd(BZ2_bzWriteOpen)533 4866 y Fi(BZFILE)46 -b(*BZ2_bzWriteOpen)e(\()j(int)g(*bzerror,)e(FILE)i(*f,)1774 -4970 y(int)g(blockSize100k,)d(int)j(verbosity,)1774 5074 -y(int)g(workFactor)e(\);)150 5230 y Fj(Prepare)31 b(to)h(write)g -(compressed)f(data)g(to)h(\014le)f(handle)f Fi(f)p Fj(.)42 -b Fi(f)30 b Fj(should)g(refer)i(to)f(a)h(\014le)f(whic)m(h)f(has)h(b)s -(een)150 5340 y(op)s(ened)f(for)g(writing,)h(and)f(for)g(whic)m(h)g -(the)h(error)g(indicator)g(\()p Fi(ferror\(f\))p Fj(\)is)e(not)h(set.)p -eop -%%Page: 24 25 -24 24 bop 150 -116 a Fl(Chapter)30 b(3:)41 b(Programming)29 -b(with)g Fi(libbzip2)1881 b Fl(24)150 299 y Fj(F)-8 b(or)31 -b(the)g(meaning)f(of)h(parameters)g Fi(blockSize100k)p -Fj(,)c Fi(verbosity)h Fj(and)h Fi(workFactor)p Fj(,)f(see)150 -408 y Fi(BZ2_bzCompressInit)p Fj(.)150 565 y(All)38 b(required)g -(memory)g(is)g(allo)s(cated)i(at)e(this)h(stage,)i(so)d(if)g(the)h -(call)g(completes)g(successfully)-8 b(,)41 b Fi(BZ_)150 -675 y(MEM_ERROR)28 b Fj(cannot)j(b)s(e)e(signalled)i(b)m(y)g(a)f -(subsequen)m(t)g(call)h(to)h Fi(BZ2_bzWrite)p Fj(.)150 -832 y(P)m(ossible)g(assignmen)m(ts)e(to)i Fi(bzerror)p -Fj(:)572 983 y Fi(BZ_CONFIG_ERROR)663 1087 y Fj(if)e(the)h(library)f -(has)g(b)s(een)g(mis-compiled)572 1190 y Fi(BZ_PARAM_ERROR)663 -1294 y Fj(if)g Fi(f)g Fj(is)h Fi(NULL)663 1398 y Fj(or)f -Fi(blockSize100k)45 b(<)i(1)30 b Fj(or)h Fi(blockSize100k)44 -b(>)j(9)572 1502 y(BZ_IO_ERROR)663 1605 y Fj(if)30 b -Fi(ferror\(f\))e Fj(is)i(nonzero)572 1709 y Fi(BZ_MEM_ERROR)663 -1813 y Fj(if)g(insu\016cien)m(t)h(memory)f(is)h(a)m(v)-5 -b(ailable)572 1917 y Fi(BZ_OK)663 2021 y Fj(otherwise)150 -2177 y(P)m(ossible)32 b(return)e(v)-5 b(alues:)572 2328 -y(P)m(oin)m(ter)32 b(to)f(an)f(abstract)i Fi(BZFILE)663 -2432 y Fj(if)e Fi(bzerror)e Fj(is)j Fi(BZ_OK)572 2536 -y(NULL)663 2640 y Fj(otherwise)150 2797 y(Allo)m(w)m(able)h(next)f -(actions:)572 2948 y Fi(BZ2_bzWrite)663 3051 y Fj(if)f -Fi(bzerror)e Fj(is)j Fi(BZ_OK)597 3155 y Fj(\(y)m(ou)24 -b(could)f(go)h(directly)g(to)g Fi(BZ2_bzWriteClose)p -Fj(,)19 b(but)j(this)h(w)m(ould)g(b)s(e)g(prett)m(y)h(p)s(oin)m -(tless\))572 3259 y Fi(BZ2_bzWriteClose)663 3363 y Fj(otherwise)150 -3852 y Fe(3.4.6)63 b Fd(BZ2_bzWrite)533 4039 y Fi(void)47 -b(BZ2_bzWrite)e(\()i(int)g(*bzerror,)e(BZFILE)h(*b,)h(void)g(*buf,)f -(int)h(len)g(\);)150 4196 y Fj(Absorbs)35 b Fi(len)h -Fj(b)m(ytes)h(from)f(the)h(bu\013er)f Fi(buf)p Fj(,)h(ev)m(en)m(tually) -i(to)e(b)s(e)f(compressed)g(and)g(written)h(to)h(the)150 -4306 y(\014le.)150 4463 y(P)m(ossible)32 b(assignmen)m(ts)e(to)i -Fi(bzerror)p Fj(:)572 4614 y Fi(BZ_PARAM_ERROR)663 4717 -y Fj(if)e Fi(b)g Fj(is)h Fi(NULL)e Fj(or)h Fi(buf)g Fj(is)g -Fi(NULL)f Fj(or)i Fi(len)47 b(<)g(0)572 4821 y(BZ_SEQUENCE_ERROR)663 -4925 y Fj(if)30 b(b)g(w)m(as)g(op)s(ened)g(with)g Fi(BZ2_bzReadOpen)572 -5029 y(BZ_IO_ERROR)663 5132 y Fj(if)g(there)h(is)f(an)g(error)h -(writing)g(the)g(compressed)f(\014le.)572 5236 y Fi(BZ_OK)663 -5340 y Fj(otherwise)p eop -%%Page: 25 26 -25 25 bop 150 -116 a Fl(Chapter)30 b(3:)41 b(Programming)29 -b(with)g Fi(libbzip2)1881 b Fl(25)150 299 y Fe(3.4.7)63 -b Fd(BZ2_bzWriteClose)533 486 y Fi(void)47 b(BZ2_bzWriteClose)c(\()48 -b(int)f(*bzerror,)e(BZFILE*)h(f,)1679 589 y(int)h(abandon,)1679 -693 y(unsigned)e(int*)i(nbytes_in,)1679 797 y(unsigned)e(int*)i -(nbytes_out)e(\);)533 1005 y(void)i(BZ2_bzWriteClose64)c(\()k(int)g -(*bzerror,)e(BZFILE*)h(f,)1774 1108 y(int)h(abandon,)1774 -1212 y(unsigned)f(int*)g(nbytes_in_lo32,)1774 1316 y(unsigned)g(int*)g -(nbytes_in_hi32,)1774 1420 y(unsigned)g(int*)g(nbytes_out_lo32,)1774 -1523 y(unsigned)g(int*)g(nbytes_out_hi32)e(\);)150 1680 -y Fj(Compresses)38 b(and)f(\015ushes)g(to)i(the)g(compressed)f(\014le)h -(all)f(data)h(so)g(far)f(supplied)f(b)m(y)h Fi(BZ2_bzWrite)p -Fj(.)150 1790 y(The)h(logical)i(end-of-stream)e(mark)m(ers)h(are)f -(also)g(written,)j(so)d(subsequen)m(t)g(calls)g(to)h -Fi(BZ2_bzWrite)150 1899 y Fj(are)31 b(illegal.)45 b(All)31 -b(memory)g(asso)s(ciated)h(with)f(the)g(compressed)g(\014le)h -Fi(b)e Fj(is)h(released.)44 b Fi(fflush)29 b Fj(is)i(called)150 -2009 y(on)f(the)h(compressed)f(\014le,)h(but)f(it)h(is)f(not)h -Fi(fclose)p Fj('d.)150 2166 y(If)g Fi(BZ2_bzWriteClose)d -Fj(is)j(called)i(to)f(clean)g(up)f(after)h(an)f(error,)i(the)f(only)g -(action)h(is)e(to)i(release)g(the)150 2275 y(memory)-8 -b(.)40 b(The)28 b(library)g(records)g(the)g(error)g(co)s(des)g(issued)f -(b)m(y)g(previous)h(calls,)h(so)f(this)g(situation)h(will)150 -2385 y(b)s(e)37 b(detected)i(automatically)-8 b(.)64 -b(There)38 b(is)f(no)h(attempt)g(to)h(complete)g(the)e(compression)h -(op)s(eration,)150 2495 y(nor)f(to)h Fi(fflush)e Fj(the)h(compressed)g -(\014le.)62 b(Y)-8 b(ou)37 b(can)g(force)h(this)g(b)s(eha)m(viour)e(to) -i(happ)s(en)e(ev)m(en)i(in)f(the)150 2604 y(case)31 b(of)g(no)f(error,) -h(b)m(y)f(passing)g(a)h(nonzero)g(v)-5 b(alue)30 b(to)i -Fi(abandon)p Fj(.)150 2761 y(If)h Fi(nbytes_in)d Fj(is)j(non-n)m(ull,)g -Fi(*nbytes_in)d Fj(will)j(b)s(e)f(set)i(to)f(b)s(e)g(the)g(total)h(v)m -(olume)g(of)f(uncompressed)150 2871 y(data)k(handled.)58 -b(Similarly)-8 b(,)38 b Fi(nbytes_out)c Fj(will)j(b)s(e)f(set)h(to)g -(the)g(total)h(v)m(olume)g(of)e(compressed)h(data)150 -2980 y(written.)i(F)-8 b(or)24 b(compatibilit)m(y)i(with)d(older)h(v)m -(ersions)g(of)g(the)g(library)-8 b(,)25 b Fi(BZ2_bzWriteClose)19 -b Fj(only)24 b(yields)150 3090 y(the)39 b(lo)m(w)m(er)i(32)e(bits)g(of) -g(these)g(coun)m(ts.)67 b(Use)39 b Fi(BZ2_bzWriteClose64)34 -b Fj(if)39 b(y)m(ou)g(w)m(an)m(t)g(the)h(full)e(64)i(bit)150 -3199 y(coun)m(ts.)h(These)31 b(t)m(w)m(o)h(functions)e(are)h(otherwise) -g(absolutely)g(iden)m(tical.)150 3356 y(P)m(ossible)h(assignmen)m(ts)e -(to)i Fi(bzerror)p Fj(:)572 3507 y Fi(BZ_SEQUENCE_ERROR)663 -3611 y Fj(if)e Fi(b)g Fj(w)m(as)h(op)s(ened)e(with)h -Fi(BZ2_bzReadOpen)572 3715 y(BZ_IO_ERROR)663 3819 y Fj(if)g(there)h(is) -f(an)g(error)h(writing)g(the)g(compressed)f(\014le)572 -3922 y Fi(BZ_OK)663 4026 y Fj(otherwise)150 4296 y Fe(3.4.8)63 -b(Handling)41 b(em)m(b)s(edded)g(compressed)h(data)e(streams)150 -4489 y Fj(The)i(high-lev)m(el)h(library)e(facilitates)k(use)c(of)h -Fi(bzip2)e Fj(data)i(streams)g(whic)m(h)f(form)g(some)h(part)g(of)g(a) -150 4598 y(surrounding,)29 b(larger)i(data)g(stream.)225 -4755 y Fh(\017)60 b Fj(F)-8 b(or)49 b(writing,)k(the)c(library)f(tak)m -(es)i(an)e(op)s(en)f(\014le)i(handle,)j(writes)d(compressed)f(data)h -(to)g(it,)330 4865 y Fi(fflush)p Fj(es)37 b(it)h(but)f(do)s(es)h(not)g -Fi(fclose)f Fj(it.)64 b(The)38 b(calling)h(application)g(can)f(write)g -(its)h(o)m(wn)f(data)330 4974 y(b)s(efore)30 b(and)g(after)h(the)g -(compressed)f(data)h(stream,)g(using)f(that)h(same)f(\014le)h(handle.) -225 5121 y Fh(\017)60 b Fj(Reading)33 b(is)g(more)g(complex,)h(and)e -(the)h(facilities)i(are)e(not)g(as)g(general)h(as)f(they)g(could)f(b)s -(e)h(since)330 5230 y(generalit)m(y)e(is)d(hard)g(to)h(reconcile)i -(with)d(e\016ciency)-8 b(.)42 b Fi(BZ2_bzRead)25 b Fj(reads)k(from)f -(the)h(compressed)330 5340 y(\014le)39 b(in)g(blo)s(c)m(ks)g(of)h(size) -f Fi(BZ_MAX_UNUSED)d Fj(b)m(ytes,)42 b(and)c(in)h(doing)g(so)g -(probably)g(will)g(o)m(v)m(ersho)s(ot)p eop -%%Page: 26 27 -26 26 bop 150 -116 a Fl(Chapter)30 b(3:)41 b(Programming)29 -b(with)g Fi(libbzip2)1881 b Fl(26)330 299 y Fj(the)37 -b(logical)i(end)e(of)g(compressed)g(stream.)60 b(T)-8 -b(o)38 b(reco)m(v)m(er)i(this)c(data)i(once)f(decompression)g(has)330 -408 y(ended,)27 b(call)h Fi(BZ2_bzReadGetUnused)21 b -Fj(after)27 b(the)g(last)h(call)f(of)g Fi(BZ2_bzRead)d -Fj(\(the)j(one)g(returning)330 518 y Fi(BZ_STREAM_END)p -Fj(\))g(but)j(b)s(efore)g(calling)i Fi(BZ2_bzReadClose)p -Fj(.)150 702 y(This)j(mec)m(hanism)h(mak)m(es)g(it)h(easy)f(to)g -(decompress)g(m)m(ultiple)g Fi(bzip2)e Fj(streams)i(placed)g -(end-to-end.)150 812 y(As)67 b(the)h(end)g(of)f(one)i(stream,)77 -b(when)67 b Fi(BZ2_bzRead)e Fj(returns)j Fi(BZ_STREAM_END)p -Fj(,)73 b(call)c Fi(BZ2_)150 921 y(bzReadGetUnused)34 -b Fj(to)40 b(collect)h(the)e(un)m(used)e(data)i(\(cop)m(y)g(it)g(in)m -(to)h(y)m(our)f(o)m(wn)f(bu\013er)g(somewhere\).)150 -1031 y(That)52 b(data)h(forms)f(the)g(start)h(of)f(the)h(next)f -(compressed)g(stream.)107 b(T)-8 b(o)53 b(start)g(uncompressing)150 -1140 y(that)61 b(next)f(stream,)68 b(call)62 b Fi(BZ2_bzReadOpen)56 -b Fj(again,)68 b(feeding)61 b(in)e(the)i(un)m(used)e(data)h(via)h(the) -150 1250 y Fi(unused)p Fj(/)p Fi(nUnused)28 b Fj(parameters.)45 -b(Keep)32 b(doing)f(this)h(un)m(til)g Fi(BZ_STREAM_END)c -Fj(return)j(coincides)h(with)150 1360 y(the)j(ph)m(ysical)g(end)f(of)h -(\014le)f(\()p Fi(feof\(f\))p Fj(\).)52 b(In)34 b(this)h(situation)g -Fi(BZ2_bzReadGetUnused)30 b Fj(will)35 b(of)f(course)150 -1469 y(return)c(no)g(data.)150 1626 y(This)24 b(should)g(giv)m(e)i -(some)f(feel)h(for)f(ho)m(w)g(the)g(high-lev)m(el)h(in)m(terface)h(can) -d(b)s(e)g(used.)38 b(If)25 b(y)m(ou)g(require)g(extra)150 -1736 y(\015exibilit)m(y)-8 b(,)32 b(y)m(ou'll)g(ha)m(v)m(e)f(to)g(bite) -g(the)g(bullet)g(and)e(get)j(to)f(grips)f(with)h(the)f(lo)m(w-lev)m(el) -k(in)m(terface.)150 1987 y Fe(3.4.9)63 b(Standard)40 -b(\014le-reading/writing)j(co)s(de)150 2179 y Fj(Here's)31 -b(ho)m(w)g(y)m(ou'd)f(write)h(data)g(to)g(a)g(compressed)f(\014le:)390 -2538 y Fi(FILE*)142 b(f;)390 2642 y(BZFILE*)46 b(b;)390 -2746 y(int)238 b(nBuf;)390 2849 y(char)190 b(buf[)46 -b(/*)i(whatever)d(size)i(you)g(like)f(*/)i(];)390 2953 -y(int)238 b(bzerror;)390 3057 y(int)g(nWritten;)390 3264 -y(f)47 b(=)h(fopen)e(\()i("myfile.bz2",)c("w")j(\);)390 -3368 y(if)g(\(!f\))g({)533 3472 y(/*)g(handle)f(error)h(*/)390 -3576 y(})390 3680 y(b)g(=)h(BZ2_bzWriteOpen)c(\()j(&bzerror,)e(f,)i(9)h -(\);)390 3783 y(if)f(\(bzerror)f(!=)h(BZ_OK\))f({)533 -3887 y(BZ2_bzWriteClose)e(\()j(b)g(\);)533 3991 y(/*)g(handle)f(error)h -(*/)390 4095 y(})390 4302 y(while)f(\()i(/*)f(condition)e(*/)i(\))h({) -533 4406 y(/*)f(get)g(data)g(to)g(write)f(into)h(buf,)g(and)g(set)g -(nBuf)f(appropriately)e(*/)533 4510 y(nWritten)i(=)h(BZ2_bzWrite)e(\()i -(&bzerror,)f(b,)h(buf,)f(nBuf)h(\);)533 4614 y(if)g(\(bzerror)f(==)h -(BZ_IO_ERROR\))e({)676 4717 y(BZ2_bzWriteClose)f(\()j(&bzerror,)e(b)j -(\);)676 4821 y(/*)g(handle)e(error)g(*/)533 4925 y(})390 -5029 y(})390 5236 y(BZ2_bzWriteClose)d(\()48 b(&bzerror,)d(b)j(\);)390 -5340 y(if)f(\(bzerror)f(==)h(BZ_IO_ERROR\))d({)p eop -%%Page: 27 28 -27 27 bop 150 -116 a Fl(Chapter)30 b(3:)41 b(Programming)29 -b(with)g Fi(libbzip2)1881 b Fl(27)533 299 y Fi(/*)47 -b(handle)f(error)h(*/)390 403 y(})150 559 y Fj(And)29 -b(to)i(read)g(from)f(a)g(compressed)h(\014le:)390 711 -y Fi(FILE*)142 b(f;)390 814 y(BZFILE*)46 b(b;)390 918 -y(int)238 b(nBuf;)390 1022 y(char)190 b(buf[)46 b(/*)i(whatever)d(size) -i(you)g(like)f(*/)i(];)390 1126 y(int)238 b(bzerror;)390 -1229 y(int)g(nWritten;)390 1437 y(f)47 b(=)h(fopen)e(\()i -("myfile.bz2",)c("r")j(\);)390 1541 y(if)g(\(!f\))g({)533 -1645 y(/*)g(handle)f(error)h(*/)390 1748 y(})390 1852 -y(b)g(=)h(BZ2_bzReadOpen)c(\()j(&bzerror,)f(f,)h(0,)g(NULL,)f(0)i(\);) -390 1956 y(if)f(\(bzerror)f(!=)h(BZ_OK\))f({)533 2060 -y(BZ2_bzReadClose)e(\()j(&bzerror,)f(b)h(\);)533 2163 -y(/*)g(handle)f(error)h(*/)390 2267 y(})390 2475 y(bzerror)f(=)h -(BZ_OK;)390 2579 y(while)f(\(bzerror)g(==)h(BZ_OK)f(&&)i(/*)f -(arbitrary)e(other)h(conditions)f(*/\))i({)533 2682 y(nBuf)g(=)g -(BZ2_bzRead)e(\()j(&bzerror,)d(b,)i(buf,)g(/*)g(size)g(of)g(buf)g(*/)g -(\);)533 2786 y(if)g(\(bzerror)f(==)h(BZ_OK\))f({)676 -2890 y(/*)i(do)f(something)e(with)i(buf[0)f(..)h(nBuf-1])f(*/)533 -2994 y(})390 3097 y(})390 3201 y(if)h(\(bzerror)f(!=)h(BZ_STREAM_END\)) -d({)533 3305 y(BZ2_bzReadClose)g(\()j(&bzerror,)f(b)h(\);)533 -3409 y(/*)g(handle)f(error)h(*/)390 3513 y(})g(else)g({)533 -3616 y(BZ2_bzReadClose)d(\()j(&bzerror)f(\);)390 3720 -y(})150 3991 y Fk(3.5)68 b(Utilit)l(y)47 b(functions)150 -4264 y Fe(3.5.1)63 b Fd(BZ2_bzBuffToBuffCompress)533 -4451 y Fi(int)47 b(BZ2_bzBuffToBuffCompress\()41 b(char*)428 -b(dest,)1965 4555 y(unsigned)46 b(int*)g(destLen,)1965 -4658 y(char*)428 b(source,)1965 4762 y(unsigned)46 b(int)94 -b(sourceLen,)1965 4866 y(int)524 b(blockSize100k,)1965 -4970 y(int)g(verbosity,)1965 5074 y(int)g(workFactor)45 -b(\);)150 5230 y Fj(A)m(ttempts)32 b(to)f(compress)f(the)h(data)g(in)f -Fi(source[0)e(..)i(sourceLen-1])d Fj(in)m(to)k(the)g(destination)g -(bu\013er,)150 5340 y Fi(dest[0)e(..)g(*destLen-1])p -Fj(.)36 b(If)25 b(the)g(destination)h(bu\013er)e(is)h(big)f(enough,)i -Fi(*destLen)d Fj(is)i(set)g(to)h(the)f(size)p eop -%%Page: 28 29 -28 28 bop 150 -116 a Fl(Chapter)30 b(3:)41 b(Programming)29 -b(with)g Fi(libbzip2)1881 b Fl(28)150 299 y Fj(of)27 -b(the)f(compressed)h(data,)h(and)d Fi(BZ_OK)g Fj(is)i(returned.)38 -b(If)27 b(the)f(compressed)h(data)g(w)m(on't)g(\014t,)g -Fi(*destLen)150 408 y Fj(is)j(unc)m(hanged,)g(and)g Fi(BZ_OUTBUFF_FULL) -c Fj(is)31 b(returned.)150 565 y(Compression)22 b(in)h(this)f(manner)g -(is)h(a)f(one-shot)i(ev)m(en)m(t,)i(done)d(with)f(a)h(single)g(call)h -(to)f(this)g(function.)38 b(The)150 675 y(resulting)26 -b(compressed)g(data)f(is)h(a)g(complete)h Fi(bzip2)d -Fj(format)i(data)g(stream.)39 b(There)26 b(is)f(no)h(mec)m(hanism)150 -784 y(for)d(making)g(additional)h(calls)f(to)h(pro)m(vide)g(extra)f -(input)g(data.)38 b(If)23 b(y)m(ou)g(w)m(an)m(t)h(that)f(kind)f(of)i -(mec)m(hanism,)150 894 y(use)30 b(the)h(lo)m(w-lev)m(el)i(in)m -(terface.)150 1051 y(F)-8 b(or)31 b(the)g(meaning)f(of)h(parameters)g -Fi(blockSize100k)p Fj(,)c Fi(verbosity)h Fj(and)h Fi(workFactor)p -Fj(,)150 1160 y(see)i Fi(BZ2_bzCompressInit)p Fj(.)150 -1317 y(T)-8 b(o)37 b(guaran)m(tee)h(that)f(the)f(compressed)h(data)f -(will)h(\014t)f(in)g(its)h(bu\013er,)g(allo)s(cate)h(an)e(output)g -(bu\013er)g(of)150 1427 y(size)31 b(1\045)g(larger)g(than)f(the)h -(uncompressed)e(data,)i(plus)f(six)g(h)m(undred)f(extra)i(b)m(ytes.)150 -1584 y Fi(BZ2_bzBuffToBuffDecompre)o(ss)24 b Fj(will)31 -b(not)g(write)g(data)f(at)h(or)g(b)s(ey)m(ond)e Fi(dest[*destLen])p -Fj(,)e(ev)m(en)k(in)150 1693 y(case)g(of)g(bu\013er)e(o)m(v)m(er\015o)m -(w.)150 1850 y(P)m(ossible)j(return)e(v)-5 b(alues:)572 -2001 y Fi(BZ_CONFIG_ERROR)663 2105 y Fj(if)30 b(the)h(library)f(has)g -(b)s(een)g(mis-compiled)572 2209 y Fi(BZ_PARAM_ERROR)663 -2313 y Fj(if)g Fi(dest)f Fj(is)i Fi(NULL)e Fj(or)i Fi(destLen)d -Fj(is)i Fi(NULL)663 2416 y Fj(or)g Fi(blockSize100k)45 -b(<)i(1)30 b Fj(or)h Fi(blockSize100k)44 b(>)j(9)663 -2520 y Fj(or)30 b Fi(verbosity)46 b(<)h(0)30 b Fj(or)h -Fi(verbosity)45 b(>)i(4)663 2624 y Fj(or)30 b Fi(workFactor)45 -b(<)j(0)30 b Fj(or)g Fi(workFactor)45 b(>)j(250)572 2728 -y(BZ_MEM_ERROR)663 2831 y Fj(if)30 b(insu\016cien)m(t)h(memory)f(is)h -(a)m(v)-5 b(ailable)572 2935 y Fi(BZ_OUTBUFF_FULL)663 -3039 y Fj(if)30 b(the)h(size)g(of)g(the)f(compressed)h(data)f(exceeds)i -Fi(*destLen)572 3143 y(BZ_OK)663 3247 y Fj(otherwise)150 -3616 y Fe(3.5.2)63 b Fd(BZ2_bzBuffToBuffDecompress)533 -3803 y Fi(int)47 b(BZ2_bzBuffToBuffDecompres)o(s)42 b(\()47 -b(char*)428 b(dest,)2108 3906 y(unsigned)46 b(int*)g(destLen,)2108 -4010 y(char*)428 b(source,)2108 4114 y(unsigned)46 b(int)94 -b(sourceLen,)2108 4218 y(int)524 b(small,)2108 4322 y(int)g(verbosity) -46 b(\);)150 4478 y Fj(A)m(ttempts)22 b(to)g(decompress)f(the)h(data)f -(in)g Fi(source[0)28 b(..)i(sourceLen-1])18 b Fj(in)m(to)k(the)f -(destination)h(bu\013er,)150 4588 y Fi(dest[0)29 b(..)g(*destLen-1])p -Fj(.)61 b(If)37 b(the)h(destination)h(bu\013er)e(is)h(big)g(enough,)i -Fi(*destLen)35 b Fj(is)j(set)h(to)g(the)150 4698 y(size)e(of)e(the)h -(uncompressed)f(data,)j(and)c Fi(BZ_OK)h Fj(is)g(returned.)56 -b(If)36 b(the)g(compressed)f(data)h(w)m(on't)h(\014t,)150 -4807 y Fi(*destLen)28 b Fj(is)j(unc)m(hanged,)f(and)f -Fi(BZ_OUTBUFF_FULL)d Fj(is)31 b(returned.)150 4964 y -Fi(source)e Fj(is)h(assumed)f(to)j(hold)e(a)g(complete)i -Fi(bzip2)d Fj(format)i(data)g(stream.)150 5074 y Fi -(BZ2_bzBuffToBuffDecompre)o(ss)38 b Fj(tries)46 b(to)f(decompress)f -(the)h(en)m(tiret)m(y)i(of)d(the)h(stream)g(in)m(to)h(the)150 -5183 y(output)30 b(bu\013er.)150 5340 y(F)-8 b(or)31 -b(the)g(meaning)f(of)h(parameters)g Fi(small)e Fj(and)g -Fi(verbosity)p Fj(,)f(see)j Fi(BZ2_bzDecompressInit)p -Fj(.)p eop -%%Page: 29 30 -29 29 bop 150 -116 a Fl(Chapter)30 b(3:)41 b(Programming)29 -b(with)g Fi(libbzip2)1881 b Fl(29)150 299 y Fj(Because)31 -b(the)g(compression)g(ratio)g(of)g(the)g(compressed)f(data)h(cannot)g -(b)s(e)f(kno)m(wn)g(in)g(adv)-5 b(ance,)31 b(there)150 -408 y(is)g(no)f(easy)i(w)m(a)m(y)f(to)h(guaran)m(tee)g(that)f(the)g -(output)g(bu\013er)f(will)h(b)s(e)f(big)h(enough.)42 -b(Y)-8 b(ou)30 b(ma)m(y)i(of)e(course)150 518 y(mak)m(e)j(arrangemen)m -(ts)h(in)e(y)m(our)h(co)s(de)f(to)i(record)f(the)g(size)g(of)g(the)g -(uncompressed)e(data,)j(but)e(suc)m(h)g(a)150 628 y(mec)m(hanism)e(is)h -(b)s(ey)m(ond)f(the)g(scop)s(e)h(of)f(this)h(library)-8 -b(.)150 784 y Fi(BZ2_bzBuffToBuffDecompre)o(ss)24 b Fj(will)31 -b(not)g(write)g(data)f(at)h(or)g(b)s(ey)m(ond)e Fi(dest[*destLen])p -Fj(,)e(ev)m(en)k(in)150 894 y(case)g(of)g(bu\013er)e(o)m(v)m(er\015o)m -(w.)150 1051 y(P)m(ossible)j(return)e(v)-5 b(alues:)572 -1202 y Fi(BZ_CONFIG_ERROR)663 1306 y Fj(if)30 b(the)h(library)f(has)g -(b)s(een)g(mis-compiled)572 1409 y Fi(BZ_PARAM_ERROR)663 -1513 y Fj(if)g Fi(dest)f Fj(is)i Fi(NULL)e Fj(or)i Fi(destLen)d -Fj(is)i Fi(NULL)663 1617 y Fj(or)g Fi(small)47 b(!=)g(0)g(&&)h(small)e -(!=)h(1)663 1721 y Fj(or)30 b Fi(verbosity)46 b(<)h(0)30 -b Fj(or)h Fi(verbosity)45 b(>)i(4)572 1825 y(BZ_MEM_ERROR)663 -1928 y Fj(if)30 b(insu\016cien)m(t)h(memory)f(is)h(a)m(v)-5 -b(ailable)572 2032 y Fi(BZ_OUTBUFF_FULL)663 2136 y Fj(if)30 -b(the)h(size)g(of)g(the)f(compressed)h(data)f(exceeds)i -Fi(*destLen)572 2240 y(BZ_DATA_ERROR)663 2343 y Fj(if)e(a)h(data)f(in)m -(tegrit)m(y)j(error)e(w)m(as)g(detected)g(in)g(the)f(compressed)h(data) -572 2447 y Fi(BZ_DATA_ERROR_MAGIC)663 2551 y Fj(if)f(the)h(compressed)f -(data)h(do)s(esn't)f(b)s(egin)g(with)g(the)h(righ)m(t)g(magic)h(b)m -(ytes)572 2655 y Fi(BZ_UNEXPECTED_EOF)663 2759 y Fj(if)e(the)h -(compressed)f(data)h(ends)e(unexp)s(ectedly)572 2862 -y Fi(BZ_OK)663 2966 y Fj(otherwise)150 3370 y Fk(3.6)68 -b Fc(zlib)43 b Fk(compatibilit)l(y)k(functions)150 3563 -y Fj(Y)-8 b(oshiok)j(a)32 b(Tsuneo)f(has)f(con)m(tributed)i(some)f -(functions)g(to)h(giv)m(e)h(b)s(etter)e Fi(zlib)f Fj(compatibilit)m(y) --8 b(.)46 b(These)150 3673 y(functions)37 b(are)g Fi(BZ2_bzopen)p -Fj(,)f Fi(BZ2_bzread)p Fj(,)g Fi(BZ2_bzwrite)p Fj(,)g -Fi(BZ2_bzflush)p Fj(,)f Fi(BZ2_bzclose)p Fj(,)h Fi(BZ2_)150 -3782 y(bzerror)22 b Fj(and)g Fi(BZ2_bzlibVersion)p Fj(.)34 -b(These)24 b(functions)f(are)h(not)g(\(y)m(et\))h(o\016cially)g(part)f -(of)g(the)f(library)-8 b(.)150 3892 y(If)30 b(they)h(break,)f(y)m(ou)h -(get)h(to)f(k)m(eep)g(all)g(the)g(pieces.)41 b(Nev)m(ertheless,)33 -b(I)e(think)f(they)g(w)m(ork)h(ok.)390 4043 y Fi(typedef)46 -b(void)g(BZFILE;)390 4250 y(const)g(char)h(*)g(BZ2_bzlibVersion)d(\()j -(void)g(\);)150 4407 y Fj(Returns)30 b(a)h(string)f(indicating)h(the)g -(library)f(v)m(ersion.)390 4558 y Fi(BZFILE)46 b(*)i(BZ2_bzopen)92 -b(\()48 b(const)e(char)h(*path,)f(const)g(char)h(*mode)f(\);)390 -4662 y(BZFILE)g(*)i(BZ2_bzdopen)c(\()k(int)381 b(fd,)190 -b(const)46 b(char)h(*mode)f(\);)150 4819 y Fj(Op)s(ens)36 -b(a)i Fi(.bz2)e Fj(\014le)i(for)f(reading)h(or)f(writing,)j(using)d -(either)h(its)g(name)f(or)h(a)g(pre-existing)g(\014le)g(de-)150 -4928 y(scriptor.)j(Analogous)31 b(to)g Fi(fopen)e Fj(and)h -Fi(fdopen)p Fj(.)390 5079 y Fi(int)47 b(BZ2_bzread)93 -b(\()47 b(BZFILE*)f(b,)h(void*)f(buf,)h(int)g(len)g(\);)390 -5183 y(int)g(BZ2_bzwrite)e(\()i(BZFILE*)f(b,)h(void*)f(buf,)h(int)g -(len)g(\);)150 5340 y Fj(Reads/writes)31 b(data)g(from/to)g(a)g -(previously)f(op)s(ened)f Fi(BZFILE)p Fj(.)39 b(Analogous)31 -b(to)g Fi(fread)e Fj(and)g Fi(fwrite)p Fj(.)p eop -%%Page: 30 31 -30 30 bop 150 -116 a Fl(Chapter)30 b(3:)41 b(Programming)29 -b(with)g Fi(libbzip2)1881 b Fl(30)390 299 y Fi(int)95 -b(BZ2_bzflush)44 b(\()k(BZFILE*)e(b)h(\);)390 403 y(void)g(BZ2_bzclose) -d(\()k(BZFILE*)e(b)h(\);)150 559 y Fj(Flushes/closes)29 -b(a)f Fi(BZFILE)p Fj(.)38 b Fi(BZ2_bzflush)25 b Fj(do)s(esn't)j -(actually)h(do)f(an)m(ything.)41 b(Analogous)28 b(to)h -Fi(fflush)150 669 y Fj(and)h Fi(fclose)p Fj(.)390 820 -y Fi(const)46 b(char)h(*)g(BZ2_bzerror)e(\()j(BZFILE)e(*b,)h(int)g -(*errnum)e(\))150 977 y Fj(Returns)29 b(a)h(string)g(describing)g(the)f -(more)i(recen)m(t)g(error)f(status)g(of)f Fi(b)p Fj(,)h(and)f(also)h -(sets)g Fi(*errnum)e Fj(to)i(its)150 1087 y(n)m(umerical)h(v)-5 -b(alue.)150 1411 y Fk(3.7)68 b(Using)46 b(the)f(library)g(in)g(a)g -Fc(stdio)p Fk(-free)f(en)l(vironmen)l(t)150 1723 y Fe(3.7.1)63 -b(Getting)41 b(rid)g(of)g Fd(stdio)150 1915 y Fj(In)g(a)g(deeply)h(em)m -(b)s(edded)e(application,)46 b(y)m(ou)41 b(migh)m(t)i(w)m(an)m(t)f(to)g -(use)f(just)g(the)h(memory-to-memory)150 2025 y(functions.)d(Y)-8 -b(ou)27 b(can)g(do)g(this)g(con)m(v)m(enien)m(tly)j(b)m(y)d(compiling)h -(the)f(library)g(with)g(prepro)s(cessor)g(sym)m(b)s(ol)150 -2135 y Fi(BZ_NO_STDIO)34 b Fj(de\014ned.)60 b(Doing)38 -b(this)f(giv)m(es)i(y)m(ou)e(a)h(library)f(con)m(taining)i(only)e(the)h -(follo)m(wing)g(eigh)m(t)150 2244 y(functions:)150 2401 -y Fi(BZ2_bzCompressInit)p Fj(,)26 b Fi(BZ2_bzCompress)p -Fj(,)g Fi(BZ2_bzCompressEnd)150 2511 y(BZ2_bzDecompressInit)p -Fj(,)f Fi(BZ2_bzDecompress)p Fj(,)h Fi(BZ2_bzDecompressEnd)150 -2620 y(BZ2_bzBuffToBuffCompress)o Fj(,)f Fi(BZ2_bzBuffToBuffDecompre)o -(ss)150 2777 y Fj(When)30 b(compiled)h(lik)m(e)g(this,)g(all)g -(functions)f(will)h(ignore)g Fi(verbosity)d Fj(settings.)150 -3053 y Fe(3.7.2)63 b(Critical)40 b(error)h(handling)150 -3246 y Fi(libbzip2)18 b Fj(con)m(tains)k(a)f(n)m(um)m(b)s(er)e(of)i(in) -m(ternal)h(assertion)f(c)m(hec)m(ks)h(whic)m(h)f(should,)g(needless)g -(to)h(sa)m(y)-8 b(,)23 b(nev)m(er)150 3355 y(b)s(e)35 -b(activ)-5 b(ated.)58 b(Nev)m(ertheless,)40 b(if)35 b(an)h(assertion)g -(should)f(fail,)i(b)s(eha)m(viour)f(dep)s(ends)e(on)h(whether)h(or)150 -3465 y(not)31 b(the)f(library)h(w)m(as)f(compiled)h(with)f -Fi(BZ_NO_STDIO)e Fj(set.)150 3622 y(F)-8 b(or)31 b(a)g(normal)f -(compile,)i(an)e(assertion)h(failure)f(yields)h(the)g(message)533 -3773 y Fi(bzip2/libbzip2:)44 b(internal)h(error)i(number)f(N.)533 -3877 y(This)h(is)g(a)g(bug)g(in)h(bzip2/libbzip2,)43 -b(1.0.2,)j(30-Dec-2001.)533 3980 y(Please)g(report)g(it)i(to)f(me)g -(at:)g(jseward@acm.org.)91 b(If)47 b(this)g(happened)533 -4084 y(when)g(you)g(were)f(using)h(some)f(program)g(which)h(uses)f -(libbzip2)g(as)h(a)533 4188 y(component,)e(you)i(should)f(also)h -(report)f(this)h(bug)f(to)i(the)f(author\(s\))533 4292 -y(of)g(that)g(program.)93 b(Please)46 b(make)h(an)g(effort)f(to)h -(report)g(this)f(bug;)533 4395 y(timely)g(and)h(accurate)f(bug)h -(reports)e(eventually)g(lead)i(to)g(higher)533 4499 y(quality)f -(software.)93 b(Thanks.)h(Julian)46 b(Seward,)f(30)j(December)d(2001.) -150 4656 y Fj(where)33 b Fi(N)g Fj(is)g(some)g(error)h(co)s(de)f(n)m -(um)m(b)s(er.)48 b(If)32 b Fi(N)e(==)g(1007)p Fj(,)j(it)h(also)f(prin)m -(ts)g(some)h(extra)g(text)g(advising)150 4766 y(the)e(reader)f(that)h -(unreliable)f(memory)h(is)f(often)h(asso)s(ciated)g(with)f(in)m(ternal) -h(error)g(1007.)45 b(\(This)32 b(is)f(a)150 4875 y(frequen)m -(tly-observ)m(ed-phenomenon)g(with)f(v)m(ersions)h(1.0.0/1.0.1\).)150 -5032 y Fi(exit\(3\))d Fj(is)j(then)f(called.)150 5189 -y(F)-8 b(or)31 b(a)g Fi(stdio)p Fj(-free)e(library)-8 -b(,)31 b(assertion)h(failures)e(result)h(in)f(a)g(call)i(to)f(a)f -(function)h(declared)f(as:)533 5340 y Fi(extern)46 b(void)h -(bz_internal_error)c(\()k(int)g(errcode)f(\);)p eop -%%Page: 31 32 -31 31 bop 150 -116 a Fl(Chapter)30 b(3:)41 b(Programming)29 -b(with)g Fi(libbzip2)1881 b Fl(31)150 299 y Fj(The)30 -b(relev)-5 b(an)m(t)32 b(co)s(de)f(is)f(passed)g(as)g(a)h(parameter.)41 -b(Y)-8 b(ou)31 b(should)e(supply)g(suc)m(h)h(a)g(function.)150 -456 y(In)f(either)h(case,)g(once)g(an)f(assertion)h(failure)f(has)f(o)s -(ccurred,)i(an)m(y)f Fi(bz_stream)e Fj(records)i(in)m(v)m(olv)m(ed)i -(can)150 565 y(b)s(e)f(regarded)g(as)h(in)m(v)-5 b(alid.)41 -b(Y)-8 b(ou)30 b(should)f(not)i(attempt)h(to)f(resume)f(normal)h(op)s -(eration)g(with)f(them.)150 722 y(Y)-8 b(ou)27 b(ma)m(y)-8 -b(,)28 b(of)f(course,)h(c)m(hange)f(critical)i(error)e(handling)f(to)h -(suit)g(y)m(our)g(needs.)39 b(As)26 b(I)h(said)f(ab)s(o)m(v)m(e,)j -(crit-)150 832 y(ical)f(errors)g(indicate)g(bugs)e(in)h(the)h(library)f -(and)f(should)g(not)i(o)s(ccur.)40 b(All)27 b Fi(")p -Fj(normal)p Fi(")g Fj(error)g(situations)150 941 y(are)k(indicated)g -(via)f(error)h(return)f(co)s(des)h(from)f(functions,)g(and)g(can)g(b)s -(e)g(reco)m(v)m(ered)i(from.)150 1221 y Fk(3.8)68 b(Making)45 -b(a)g(Windo)l(ws)h(DLL)150 1414 y Fj(Ev)m(erything)31 -b(related)g(to)h(Windo)m(ws)e(has)f(b)s(een)h(con)m(tributed)h(b)m(y)f -(Y)-8 b(oshiok)j(a)32 b(Tsuneo)150 1523 y(\()p Fi -(QWF00133@niftyserve.or.jp)45 b Fj(/)53 b Fi(tsuneo-y@is.aist-nara.a)o -(c.jp)o Fj(\),)f(so)g(y)m(ou)h(should)e(send)150 1633 -y(y)m(our)31 b(queries)f(to)h(him)f(\(but)g(p)s(erhaps)f(Cc:)41 -b(me,)31 b Fi(jseward@acm.org)p Fj(\).)150 1790 y(My)42 -b(v)-5 b(ague)43 b(understanding)d(of)i(what)g(to)h(do)f(is:)64 -b(using)41 b(Visual)h(C)p Fi(++)f Fj(5.0,)46 b(op)s(en)c(the)g(pro)5 -b(ject)43 b(\014le)150 1899 y Fi(libbz2.dsp)p Fj(,)28 -b(and)h(build.)40 b(That's)31 b(all.)150 2056 y(If)39 -b(y)m(ou)g(can't)h(op)s(en)f(the)g(pro)5 b(ject)40 b(\014le)f(for)g -(some)h(reason,)i(mak)m(e)e(a)f(new)f(one,)k(naming)d(these)h(\014les:) -150 2166 y Fi(blocksort.c)p Fj(,)28 b Fi(bzlib.c)p Fj(,)g -Fi(compress.c)p Fj(,)g Fi(crctable.c)p Fj(,)g Fi(decompress.c)p -Fj(,)f Fi(huffman.c)p Fj(,)150 2275 y Fi(randtable.c)32 -b Fj(and)h Fi(libbz2.def)p Fj(.)51 b(Y)-8 b(ou)35 b(will)g(also)g(need) -f(to)i(name)e(the)h(header)g(\014les)f Fi(bzlib.h)f Fj(and)150 -2385 y Fi(bzlib_private.h)p Fj(.)150 2542 y(If)d(y)m(ou)h(don't)f(use)h -(V)m(C)p Fi(++)p Fj(,)e(y)m(ou)i(ma)m(y)g(need)f(to)h(de\014ne)f(the)h -(propro)s(cessor)f(sym)m(b)s(ol)g Fi(_WIN32)p Fj(.)150 -2698 y(Finally)-8 b(,)30 b Fi(dlltest.c)25 b Fj(is)j(a)g(sample)g -(program)h(using)e(the)i(DLL.)e(It)h(has)g(a)g(pro)5 -b(ject)29 b(\014le,)g Fi(dlltest.dsp)p Fj(.)150 2855 -y(If)h(y)m(ou)h(just)f(w)m(an)m(t)h(a)f(mak)m(e\014le)i(for)e(Visual)g -(C,)g(ha)m(v)m(e)i(a)e(lo)s(ok)h(at)g Fi(makefile.msc)p -Fj(.)150 3012 y(Be)j(a)m(w)m(are)i(that)f(if)g(y)m(ou)f(compile)i -Fi(bzip2)d Fj(itself)i(on)g(Win32,)h(y)m(ou)f(m)m(ust)f(set)i -Fi(BZ_UNIX)c Fj(to)j(0)g(and)f Fi(BZ_)150 3122 y(LCCWIN32)25 -b Fj(to)j(1,)g(in)f(the)h(\014le)f Fi(bzip2.c)p Fj(,)f(b)s(efore)h -(compiling.)41 b(Otherwise)27 b(the)h(resulting)g(binary)e(w)m(on't)150 -3231 y(w)m(ork)31 b(correctly)-8 b(.)150 3388 y(I)30 -b(ha)m(v)m(en't)i(tried)f(an)m(y)f(of)h(this)f(stu\013)g(m)m(yself,)i -(but)d(it)i(all)g(lo)s(oks)g(plausible.)p eop -%%Page: 32 33 -32 32 bop 150 -116 a Fl(Chapter)30 b(4:)41 b(Miscellanea)2586 -b(32)150 299 y Fg(4)80 b(Miscellanea)150 549 y Fj(These)31 -b(are)f(just)g(some)h(random)f(though)m(ts)h(of)f(mine.)41 -b(Y)-8 b(our)30 b(mileage)i(ma)m(y)f(v)-5 b(ary)d(.)150 -826 y Fk(4.1)68 b(Limitations)47 b(of)e(the)g(compressed)g(\014le)h -(format)150 1018 y Fi(bzip2-1.0)p Fj(,)c Fi(0.9.5)e Fj(and)h -Fi(0.9.0)f Fj(use)i(exactly)h(the)f(same)g(\014le)g(format)g(as)f(the)h -(previous)g(v)m(ersion,)150 1128 y Fi(bzip2-0.1)p Fj(.)72 -b(This)41 b(decision)h(w)m(as)g(made)f(in)g(the)h(in)m(terests)h(of)f -(stabilit)m(y)-8 b(.)76 b(Creating)43 b(y)m(et)g(another)150 -1238 y(incompatible)i(compressed)g(\014le)g(format)g(w)m(ould)f(create) -i(further)e(confusion)h(and)e(disruption)h(for)150 1347 -y(users.)150 1504 y(Nev)m(ertheless,)30 b(this)d(is)f(not)h(a)g -(painless)g(decision.)40 b(Dev)m(elopmen)m(t)28 b(w)m(ork)f(since)h -(the)f(release)h(of)f Fi(bzip2-)150 1614 y(0.1)g Fj(in)h(August)g(1997) -i(has)d(sho)m(wn)h(complexities)i(in)e(the)g(\014le)g(format)h(whic)m -(h)f(slo)m(w)h(do)m(wn)e(decompres-)150 1723 y(sion)j(and,)g(in)g -(retrosp)s(ect,)i(are)f(unnecessary)-8 b(.)41 b(These)30 -b(are:)225 1880 y Fh(\017)60 b Fj(The)33 b(run-length)g(enco)s(der,)h -(whic)m(h)f(is)g(the)h(\014rst)f(of)g(the)h(compression)f -(transformations,)i(is)e(en-)330 1990 y(tirely)h(irrelev)-5 -b(an)m(t.)48 b(The)32 b(original)i(purp)s(ose)d(w)m(as)i(to)g(protect)h -(the)f(sorting)g(algorithm)g(from)g(the)330 2099 y(v)m(ery)26 -b(w)m(orst)g(case)h(input:)37 b(a)26 b(string)g(of)f(rep)s(eated)h(sym) -m(b)s(ols.)39 b(But)25 b(algorithm)h(steps)g(Q6a)g(and)f(Q6b)330 -2209 y(in)j(the)g(original)i(Burro)m(ws-Wheeler)e(tec)m(hnical)i(rep)s -(ort)f(\(SR)m(C-124\))h(sho)m(w)e(ho)m(w)g(rep)s(eats)g(can)h(b)s(e)330 -2318 y(handled)g(without)i(di\016cult)m(y)g(in)f(blo)s(c)m(k)h -(sorting.)225 2451 y Fh(\017)60 b Fj(The)27 b(randomisation)h(mec)m -(hanism)f(do)s(esn't)g(really)i(need)e(to)h(b)s(e)e(there.)41 -b(Udi)27 b(Man)m(b)s(er)g(and)f(Gene)330 2561 y(My)m(ers)31 -b(published)d(a)j(su\016x)e(arra)m(y)h(construction)i(algorithm)f(a)f -(few)g(y)m(ears)h(bac)m(k,)g(whic)m(h)e(can)i(b)s(e)330 -2670 y(emplo)m(y)m(ed)26 b(to)g(sort)f(an)m(y)g(blo)s(c)m(k,)i(no)e -(matter)h(ho)m(w)f(rep)s(etitiv)m(e,)j(in)c(O\(N)i(log)g(N\))f(time.)40 -b(Subsequen)m(t)330 2780 y(w)m(ork)24 b(b)m(y)f(Kunihik)m(o)g(Sadak)-5 -b(ane)23 b(has)g(pro)s(duced)e(a)j(deriv)-5 b(ativ)m(e)25 -b(O\(N)f(\(log)g(N\))p Fi(^)p Fj(2\))h(algorithm)f(whic)m(h)330 -2890 y(usually)30 b(outp)s(erforms)g(the)g(Man)m(b)s(er-My)m(ers)i -(algorithm.)330 3023 y(I)f(could)f(ha)m(v)m(e)i(c)m(hanged)f(to)g -(Sadak)-5 b(ane's)30 b(algorithm,)i(but)e(I)h(\014nd)e(it)i(to)g(b)s(e) -f(slo)m(w)m(er)i(than)e Fi(bzip2)p Fj('s)330 3132 y(existing)39 -b(algorithm)f(for)g(most)g(inputs,)h(and)d(the)i(randomisation)g(mec)m -(hanism)g(protects)h(ade-)330 3242 y(quately)34 b(against)g(bad)f -(cases.)50 b(I)33 b(didn't)g(think)g(it)h(w)m(as)f(a)h(go)s(o)s(d)f -(tradeo\013)h(to)g(mak)m(e.)50 b(P)m(artly)35 b(this)330 -3351 y(is)40 b(due)f(to)i(the)f(fact)h(that)f(I)g(w)m(as)g(not)g(\015o) -s(o)s(ded)f(with)g(email)i(complain)m(ts)g(ab)s(out)e -Fi(bzip2-0.1)p Fj('s)330 3461 y(p)s(erformance)30 b(on)g(rep)s(etitiv)m -(e)j(data,)e(so)f(p)s(erhaps)f(it)i(isn't)g(a)f(problem)g(for)h(real)g -(inputs.)330 3594 y(Probably)g(the)h(b)s(est)f(long-term)i(solution,)f -(and)f(the)g(one)h(I)f(ha)m(v)m(e)i(incorp)s(orated)e(in)m(to)i(0.9.5)g -(and)330 3704 y(ab)s(o)m(v)m(e,)41 b(is)d(to)h(use)f(the)h(existing)g -(sorting)g(algorithm)g(initially)-8 b(,)42 b(and)37 b(fall)i(bac)m(k)g -(to)f(a)h(O\(N)f(\(log)330 3813 y(N\))p Fi(^)p Fj(2\))31 -b(algorithm)h(if)e(the)h(standard)e(algorithm)j(gets)f(in)m(to)h -(di\016culties.)225 3946 y Fh(\017)60 b Fj(The)29 b(compressed)g -(\014le)g(format)h(w)m(as)f(nev)m(er)g(designed)g(to)h(b)s(e)e(handled) -g(b)m(y)h(a)g(library)-8 b(,)30 b(and)e(I)h(ha)m(v)m(e)330 -4056 y(had)e(to)h(jump)e(though)h(some)h(ho)s(ops)e(to)i(pro)s(duce)f -(an)g(e\016cien)m(t)i(implemen)m(tation)g(of)e(decompres-)330 -4165 y(sion.)39 b(It's)24 b(a)h(bit)f(hairy)-8 b(.)39 -b(T)-8 b(ry)24 b(passing)g Fi(decompress.c)d Fj(through)i(the)i(C)e -(prepro)s(cessor)i(and)e(y)m(ou'll)330 4275 y(see)31 -b(what)e(I)h(mean.)41 b(Muc)m(h)30 b(of)g(this)g(complexit)m(y)i(could) -e(ha)m(v)m(e)g(b)s(een)g(a)m(v)m(oided)h(if)f(the)g(compressed)330 -4384 y(size)h(of)g(eac)m(h)g(blo)s(c)m(k)g(of)g(data)f(w)m(as)h -(recorded)f(in)h(the)f(data)h(stream.)225 4517 y Fh(\017)60 -b Fj(An)29 b(Adler-32)j(c)m(hec)m(ksum,)f(rather)g(than)f(a)g(CR)m(C32) -h(c)m(hec)m(ksum,)h(w)m(ould)e(b)s(e)g(faster)h(to)g(compute.)150 -4698 y(It)45 b(w)m(ould)g(b)s(e)f(fair)h(to)h(sa)m(y)f(that)g(the)h -Fi(bzip2)d Fj(format)i(w)m(as)g(frozen)g(b)s(efore)g(I)g(prop)s(erly)f -(and)g(fully)150 4807 y(understo)s(o)s(d)29 b(the)i(p)s(erformance)f -(consequences)h(of)f(doing)h(so.)150 4964 y(Impro)m(v)m(emen)m(ts)25 -b(whic)m(h)d(I)i(w)m(as)f(able)g(to)h(incorp)s(orate)g(in)m(to)g -(0.9.0,)j(despite)d(using)e(the)i(same)f(\014le)g(format,)150 -5074 y(are:)225 5230 y Fh(\017)60 b Fj(Single)30 b(arra)m(y)g(implemen) -m(tation)i(of)d(the)h(in)m(v)m(erse)h(BWT.)f(This)f(signi\014can)m(tly) -i(sp)s(eeds)e(up)f(decom-)330 5340 y(pression,)i(presumably)f(b)s -(ecause)i(it)g(reduces)f(the)h(n)m(um)m(b)s(er)e(of)i(cac)m(he)g -(misses.)p eop -%%Page: 33 34 -33 33 bop 150 -116 a Fl(Chapter)30 b(4:)41 b(Miscellanea)2586 -b(33)225 299 y Fh(\017)60 b Fj(F)-8 b(aster)25 b(in)m(v)m(erse)f(MTF)g -(transform)f(for)g(large)i(MTF)f(v)-5 b(alues.)38 b(The)23 -b(new)g(implemen)m(tation)i(is)f(based)330 408 y(on)30 -b(the)h(notion)g(of)f(sliding)h(blo)s(c)m(ks)g(of)f(v)-5 -b(alues.)225 544 y Fh(\017)60 b Fi(bzip2-0.9.0)23 b Fj(no)m(w)j(reads)g -(and)f(writes)h(\014les)g(with)g Fi(fread)f Fj(and)g -Fi(fwrite)p Fj(;)h(v)m(ersion)h(0.1)g(used)e Fi(putc)330 -653 y Fj(and)30 b Fi(getc)p Fj(.)39 b(Duh!)g(W)-8 b(ell,)32 -b(y)m(ou)f(liv)m(e)g(and)f(learn.)150 836 y(F)-8 b(urther)28 -b(ahead,)h(it)g(w)m(ould)f(b)s(e)g(nice)g(to)i(b)s(e)d(able)i(to)g(do)f -(random)f(access)j(in)m(to)f(\014les.)40 b(This)28 b(will)h(require)150 -945 y(some)i(careful)g(design)f(of)g(compressed)h(\014le)f(formats.)150 -1227 y Fk(4.2)68 b(P)l(ortabilit)l(y)47 b(issues)150 -1419 y Fj(After)33 b(some)h(consideration,)h(I)e(ha)m(v)m(e)h(decided)f -(not)g(to)h(use)f(GNU)g Fi(autoconf)e Fj(to)j(con\014gure)f(0.9.5)i(or) -150 1529 y(1.0.)150 1686 y Fi(autoconf)p Fj(,)30 b(admirable)i(and)e(w) -m(onderful)h(though)h(it)g(is,)h(mainly)e(assists)h(with)g(p)s -(ortabilit)m(y)g(problems)150 1795 y(b)s(et)m(w)m(een)46 -b(Unix-lik)m(e)h(platforms.)85 b(But)45 b Fi(bzip2)f -Fj(do)s(esn't)h(ha)m(v)m(e)h(m)m(uc)m(h)g(in)f(the)g(w)m(a)m(y)h(of)g -(p)s(ortabilit)m(y)150 1905 y(problems)21 b(on)h(Unix;)j(most)d(of)g -(the)g(di\016culties)g(app)s(ear)f(when)g(p)s(orting)h(to)g(the)h(Mac,) -h(or)e(to)h(Microsoft's)150 2015 y(op)s(erating)j(systems.)40 -b Fi(autoconf)23 b Fj(do)s(esn't)j(help)f(in)g(those)i(cases,)g(and)e -(brings)g(in)h(a)g(whole)f(load)i(of)e(new)150 2124 y(complexit)m(y)-8 -b(.)150 2281 y(Most)34 b(p)s(eople)e(should)g(b)s(e)g(able)h(to)g -(compile)h(the)f(library)f(and)g(program)h(under)e(Unix)h(straigh)m(t)i -(out-)150 2391 y(of-the-b)s(o)m(x,)e(so)e(to)i(sp)s(eak,)e(esp)s -(ecially)h(if)f(y)m(ou)h(ha)m(v)m(e)g(a)g(v)m(ersion)g(of)g(GNU)g(C)e -(a)m(v)-5 b(ailable.)150 2547 y(There)31 b(are)h(a)f(couple)g(of)g -Fi(__inline__)e Fj(directiv)m(es)k(in)d(the)i(co)s(de.)42 -b(GNU)32 b(C)f(\()p Fi(gcc)p Fj(\))f(should)g(b)s(e)h(able)g(to)150 -2657 y(handle)23 b(them.)38 b(If)23 b(y)m(ou're)h(not)g(using)f(GNU)h -(C,)f(y)m(our)h(C)e(compiler)j(shouldn't)d(see)i(them)g(at)g(all.)39 -b(If)23 b(y)m(our)150 2767 y(compiler)30 b(do)s(es,)f(for)f(some)h -(reason,)h(see)f(them)g(and)f(do)s(esn't)g(lik)m(e)i(them,)g(just)e -Fi(#define)f(__inline__)150 2876 y Fj(to)37 b(b)s(e)f -Fi(/*)29 b(*/)p Fj(.)58 b(One)36 b(easy)h(w)m(a)m(y)f(to)i(do)e(this)g -(is)g(to)h(compile)g(with)f(the)h(\015ag)f Fi(-D__inline__=)p -Fj(,)e(whic)m(h)150 2986 y(should)29 b(b)s(e)h(understo)s(o)s(d)f(b)m -(y)h(most)h(Unix)f(compilers.)150 3143 y(If)k(y)m(ou)g(still)h(ha)m(v)m -(e)g(di\016culties,)h(try)e(compiling)g(with)g(the)g(macro)h -Fi(BZ_STRICT_ANSI)30 b Fj(de\014ned.)50 b(This)150 3252 -y(should)27 b(enable)i(y)m(ou)g(to)g(build)e(the)i(library)f(in)g(a)g -(strictly)i(ANSI)e(complian)m(t)i(en)m(vironmen)m(t.)41 -b(Building)150 3362 y(the)22 b(program)h(itself)g(lik)m(e)g(this)f(is)g -(dangerous)g(and)f(not)h(supp)s(orted,)h(since)f(y)m(ou)h(remo)m(v)m(e) -h Fi(bzip2)p Fj('s)c(c)m(hec)m(ks)150 3471 y(against)27 -b(compressing)g(directories,)i(sym)m(b)s(olic)d(links,)h(devices,)h -(and)e(other)h(not-really-a-\014le)i(en)m(tities.)150 -3581 y(This)h(could)g(cause)h(\014lesystem)g(corruption!)150 -3738 y(One)c(other)h(thing:)40 b(if)27 b(y)m(ou)h(create)h(a)e -Fi(bzip2)f Fj(binary)h(for)g(public)g(distribution,)h(please)g(try)g -(and)e(link)h(it)150 3847 y(statically)g(\()p Fi(gcc)j(-s)p -Fj(\).)39 b(This)24 b(a)m(v)m(oids)j(all)e(sorts)h(of)f(library-v)m -(ersion)h(issues)e(that)i(others)g(ma)m(y)f(encoun)m(ter)150 -3957 y(later)32 b(on.)150 4114 y(If)d(y)m(ou)g(build)e -Fi(bzip2)h Fj(on)g(Win32,)i(y)m(ou)f(m)m(ust)g(set)h -Fi(BZ_UNIX)c Fj(to)k(0)f(and)f Fi(BZ_LCCWIN32)e Fj(to)k(1,)f(in)g(the)g -(\014le)150 4223 y Fi(bzip2.c)p Fj(,)g(b)s(efore)h(compiling.)41 -b(Otherwise)31 b(the)g(resulting)g(binary)f(w)m(on't)h(w)m(ork)g -(correctly)-8 b(.)150 4505 y Fk(4.3)68 b(Rep)t(orting)46 -b(bugs)150 4698 y Fj(I)23 b(tried)g(prett)m(y)h(hard)e(to)i(mak)m(e)f -(sure)g Fi(bzip2)e Fj(is)i(bug)f(free,)j(b)s(oth)d(b)m(y)h(design)g -(and)f(b)m(y)h(testing.)39 b(Hop)s(efully)150 4807 y(y)m(ou'll)31 -b(nev)m(er)g(need)g(to)g(read)f(this)h(section)g(for)g(real.)150 -4964 y(Nev)m(ertheless,)j(if)d Fi(bzip2)f Fj(dies)h(with)g(a)g(segmen)m -(tation)j(fault,)e(a)f(bus)f(error)i(or)f(an)g(in)m(ternal)h(assertion) -150 5074 y(failure,)j(it)f(will)f(ask)h(y)m(ou)g(to)g(email)g(me)g(a)f -(bug)g(rep)s(ort.)50 b(Exp)s(erience)33 b(with)g(v)m(ersion)i(0.1)f -(sho)m(ws)f(that)150 5183 y(almost)e(all)g(these)g(problems)f(can)h(b)s -(e)e(traced)j(to)f(either)g(compiler)g(bugs)f(or)h(hardw)m(are)f -(problems.)225 5340 y Fh(\017)60 b Fj(Recompile)22 b(the)f(program)f -(with)h(no)f(optimisation,)k(and)c(see)h(if)g(it)g(w)m(orks.)38 -b(And/or)20 b(try)g(a)h(di\013eren)m(t)p eop -%%Page: 34 35 -34 34 bop 150 -116 a Fl(Chapter)30 b(4:)41 b(Miscellanea)2586 -b(34)330 299 y Fj(compiler.)74 b(I)41 b(heard)f(all)i(sorts)f(of)g -(stories)i(ab)s(out)d(v)-5 b(arious)41 b(\015a)m(v)m(ours)g(of)h(GNU)f -(C)g(\(and)f(other)330 408 y(compilers\))45 b(generating)g(bad)e(co)s -(de)g(for)h Fi(bzip2)p Fj(,)i(and)d(I'v)m(e)i(run)d(across)i(t)m(w)m(o) -h(suc)m(h)f(examples)330 518 y(m)m(yself.)330 716 y(2.7.X)i(v)m -(ersions)f(of)g(GNU)h(C)e(are)h(kno)m(wn)g(to)g(generate)i(bad)d(co)s -(de)h(from)f(time)i(to)g(time,)j(at)330 825 y(high)32 -b(optimisation)h(lev)m(els.)47 b(If)31 b(y)m(ou)i(get)g(problems,)f -(try)g(using)f(the)i(\015ags)f Fi(-O2)f(-fomit-frame-)330 -935 y(pointer)d(-fno-strength-reduce)p Fj(.)36 b(Y)-8 -b(ou)30 b(should)f(sp)s(eci\014cally)i Fb(not)40 b Fj(use)30 -b Fi(-funroll-loops)p Fj(.)330 1132 y(Y)-8 b(ou)36 b(ma)m(y)g(notice)i -(that)e(the)g(Mak)m(e\014le)i(runs)d(six)g(tests)i(as)f(part)g(of)g -(the)h(build)e(pro)s(cess.)57 b(If)36 b(the)330 1242 -y(program)42 b(passes)g(all)h(of)f(these,)k(it's)d(a)f(prett)m(y)i(go)s -(o)s(d)e(\(but)g(not)g(100\045\))i(indication)f(that)g(the)330 -1352 y(compiler)31 b(has)f(done)g(its)h(job)f(correctly)-8 -b(.)225 1549 y Fh(\017)60 b Fj(If)32 b Fi(bzip2)e Fj(crashes)j -(randomly)-8 b(,)32 b(and)f(the)i(crashes)f(are)g(not)h(rep)s(eatable,) -g(y)m(ou)g(ma)m(y)f(ha)m(v)m(e)h(a)f(\015aky)330 1659 -y(memory)37 b(subsystem.)57 b Fi(bzip2)35 b Fj(really)i(hammers)f(y)m -(our)h(memory)f(hierarc)m(h)m(y)-8 b(,)39 b(and)d(if)g(it's)h(a)g(bit) -330 1768 y(marginal,)c(y)m(ou)g(ma)m(y)g(get)g(these)g(problems.)46 -b(Ditto)34 b(if)e(y)m(our)h(disk)f(or)g(I/O)h(subsystem)f(is)g(slo)m -(wly)330 1878 y(failing.)41 b(Y)-8 b(up,)30 b(this)g(really)i(do)s(es)e -(happ)s(en.)330 2075 y(T)-8 b(ry)27 b(using)f(a)h(di\013eren)m(t)h(mac) -m(hine)f(of)g(the)g(same)g(t)m(yp)s(e,)h(and)e(see)h(if)g(y)m(ou)g(can) -g(rep)s(eat)g(the)g(problem.)225 2273 y Fh(\017)60 b -Fj(This)21 b(isn't)h(really)g(a)g(bug,)h(but)d(...)39 -b(If)21 b Fi(bzip2)f Fj(tells)i(y)m(ou)g(y)m(our)g(\014le)f(is)h -(corrupted)f(on)g(decompression,)330 2383 y(and)g(y)m(ou)h(obtained)g -(the)h(\014le)f(via)g(FTP)-8 b(,)23 b(there)f(is)g(a)g(p)s(ossibilit)m -(y)h(that)f(y)m(ou)h(forgot)g(to)g(tell)g(FTP)f(to)h(do)330 -2492 y(a)31 b(binary)g(mo)s(de)g(transfer.)43 b(That)31 -b(absolutely)h(will)g(cause)f(the)h(\014le)f(to)h(b)s(e)e -(non-decompressible.)330 2602 y(Y)-8 b(ou'll)31 b(ha)m(v)m(e)g(to)h -(transfer)e(it)h(again.)150 2847 y(If)e(y)m(ou'v)m(e)i(incorp)s(orated) -e Fi(libbzip2)e Fj(in)m(to)k(y)m(our)e(o)m(wn)g(program)h(and)e(are)i -(getting)h(problems,)e(please,)150 2956 y(please,)e(please,)g(c)m(hec)m -(k)f(that)g(the)f(parameters)h(y)m(ou)f(are)h(passing)e(in)h(calls)h -(to)f(the)h(library)-8 b(,)26 b(are)g(correct,)150 3066 -y(and)g(in)g(accordance)i(with)f(what)f(the)h(do)s(cumen)m(tation)h(sa) -m(ys)f(is)f(allo)m(w)m(able.)42 b(I)27 b(ha)m(v)m(e)g(tried)h(to)f(mak) -m(e)h(the)150 3175 y(library)i(robust)g(against)h(suc)m(h)g(problems,)f -(but)g(I'm)g(sure)g(I)g(ha)m(v)m(en't)i(succeeded.)150 -3332 y(Finally)-8 b(,)33 b(if)f(the)h(ab)s(o)m(v)m(e)g(commen)m(ts)g -(don't)f(help,)g(y)m(ou'll)h(ha)m(v)m(e)g(to)g(send)e(me)h(a)g(bug)f -(rep)s(ort.)46 b(No)m(w,)33 b(it's)150 3442 y(just)d(amazing)h(ho)m(w)f -(man)m(y)h(p)s(eople)f(will)h(send)e(me)i(a)f(bug)g(rep)s(ort)h(sa)m -(ying)g(something)g(lik)m(e)481 3593 y(bzip2)f(crashed)g(with)h(segmen) -m(tation)h(fault)f(on)f(m)m(y)g(mac)m(hine)150 3750 y(and)h(absolutely) -h(nothing)g(else.)45 b(Needless)33 b(to)f(sa)m(y)-8 b(,)33 -b(a)f(suc)m(h)f(a)h(rep)s(ort)g(is)f Fb(total)5 b(ly,)36 -b(utterly,)e(c)-5 b(ompletely)150 3859 y(and)27 b(c)-5 -b(ompr)g(ehensively)29 b(100\045)f(useless;)g(a)f(waste)g(of)g(your)g -(time,)h(my)e(time,)i(and)f(net)g(b)-5 b(andwidth)p Fj(.)41 -b(With)150 3969 y(no)30 b(details)i(at)f(all,)g(there's)g(no)f(w)m(a)m -(y)h(I)g(can)f(p)s(ossibly)g(b)s(egin)g(to)h(\014gure)f(out)h(what)f -(the)g(problem)h(is.)150 4126 y(The)e(rules)f(of)h(the)g(game)h(are:)40 -b(facts,)30 b(facts,)g(facts.)41 b(Don't)29 b(omit)h(them)f(b)s(ecause) -f Fi(")p Fj(oh,)h(they)g(w)m(on't)h(b)s(e)150 4235 y(relev)-5 -b(an)m(t)p Fi(")p Fj(.)42 b(A)m(t)31 b(the)g(bare)f(minim)m(um:)481 -4386 y(Mac)m(hine)h(t)m(yp)s(e.)62 b(Op)s(erating)30 -b(system)h(v)m(ersion.)481 4490 y(Exact)g(v)m(ersion)g(of)g -Fi(bzip2)d Fj(\(do)j Fi(bzip2)46 b(-V)p Fj(\).)481 4594 -y(Exact)31 b(v)m(ersion)g(of)g(the)f(compiler)h(used.)481 -4698 y(Flags)g(passed)f(to)h(the)g(compiler.)150 4854 -y(Ho)m(w)m(ev)m(er,)f(the)e(most)g(imp)s(ortan)m(t)f(single)h(thing)g -(that)f(will)h(help)f(me)g(is)g(the)h(\014le)f(that)h(y)m(ou)f(w)m(ere) -h(trying)150 4964 y(to)35 b(compress)f(or)g(decompress)g(at)g(the)h -(time)f(the)h(problem)e(happ)s(ened.)50 b(Without)34 -b(that,)i(m)m(y)e(abilit)m(y)150 5074 y(to)d(do)g(an)m(ything)f(more)h -(than)f(sp)s(eculate)h(ab)s(out)f(the)h(cause,)f(is)h(limited.)150 -5230 y(Please)h(remem)m(b)s(er)g(that)g(I)f(connect)h(to)g(the)g(In)m -(ternet)g(with)f(a)h(mo)s(dem,)f(so)g(y)m(ou)h(should)e(con)m(tact)k -(me)150 5340 y(b)s(efore)c(mailing)h(me)g(h)m(uge)f(\014les.)p -eop -%%Page: 35 36 -35 35 bop 150 -116 a Fl(Chapter)30 b(4:)41 b(Miscellanea)2586 -b(35)150 299 y Fk(4.4)68 b(Did)45 b(y)l(ou)g(get)h(the)f(righ)l(t)h -(pac)l(k)-7 b(age?)150 491 y Fi(bzip2)33 b Fj(is)h(a)h(resource)g(hog.) -54 b(It)35 b(soaks)f(up)g(large)h(amoun)m(ts)g(of)f(CPU)h(cycles)g(and) -f(memory)-8 b(.)53 b(Also,)36 b(it)150 601 y(giv)m(es)25 -b(v)m(ery)e(large)i(latencies.)40 b(In)23 b(the)g(w)m(orst)h(case,)i(y) -m(ou)d(can)h(feed)f(man)m(y)g(megab)m(ytes)i(of)e(uncompressed)150 -711 y(data)42 b(in)m(to)h(the)f(library)g(b)s(efore)f(getting)j(an)m(y) -e(compressed)g(output,)i(so)e(this)g(probably)f(rules)h(out)150 -820 y(applications)31 b(requiring)g(in)m(teractiv)m(e)i(b)s(eha)m -(viour.)150 977 y(These)j(aren't)h(faults)f(of)g(m)m(y)g(implemen)m -(tation,)j(I)d(hop)s(e,)h(but)f(more)g(an)g(in)m(trinsic)g(prop)s(ert)m -(y)g(of)h(the)150 1087 y(Burro)m(ws-Wheeler)31 b(transform)f -(\(unfortunately\).)41 b(Ma)m(yb)s(e)31 b(this)f(isn't)h(what)f(y)m(ou) -h(w)m(an)m(t.)150 1244 y(If)f(y)m(ou)g(w)m(an)m(t)h(a)f(compressor)g -(and/or)g(library)g(whic)m(h)g(is)g(faster,)h(uses)f(less)g(memory)g -(but)g(gets)h(prett)m(y)150 1353 y(go)s(o)s(d)f(compression,)g(and)g -(has)f(minimal)h(latency)-8 b(,)32 b(consider)e(Jean-loup)g(Gailly's)i -(and)d(Mark)h(Adler's)150 1463 y(w)m(ork,)h Fi(zlib-1.1.3)d -Fj(and)h Fi(gzip-1.2.4)p Fj(.)38 b(Lo)s(ok)30 b(for)h(them)f(at)150 -1620 y Fi(http://www.zlib.org)25 b Fj(and)30 b Fi(http://www.gzip.org) -25 b Fj(resp)s(ectiv)m(ely)-8 b(.)150 1776 y(F)g(or)30 -b(something)h(faster)f(and)e(ligh)m(ter)k(still,)f(y)m(ou)f(migh)m(t)g -(try)g(Markus)f(F)h(X)f(J)h(Ob)s(erh)m(umer's)e Fi(LZO)h -Fj(real-)150 1886 y(time)i(compression/decompression)h(library)-8 -b(,)31 b(at)150 1996 y Fi(http://wildsau.idv.uni-l)o(inz.)o(ac.a)o(t/m) -o(fx/l)o(zo.h)o(tml)o Fj(.)150 2152 y(If)37 b(y)m(ou)h(w)m(an)m(t)h(to) -f(use)f(the)h Fi(bzip2)e Fj(algorithms)j(to)f(compress)g(small)g(blo)s -(c)m(ks)f(of)h(data,)i(64k)e(b)m(ytes)h(or)150 2262 y(smaller,)h(for)e -(example)g(on)g(an)g(on-the-\015y)g(disk)f(compressor,)j(y)m(ou'd)e(b)s -(e)f(w)m(ell)i(advised)e(not)i(to)f(use)150 2372 y(this)k(library)-8 -b(.)74 b(Instead,)45 b(I'v)m(e)d(made)g(a)f(sp)s(ecial)h(library)g -(tuned)f(for)g(that)h(kind)f(of)h(use.)74 b(It's)42 b(part)150 -2481 y(of)d Fi(e2compr-0.40)p Fj(,)e(an)i(on-the-\015y)g(disk)f -(compressor)h(for)g(the)g(Lin)m(ux)e Fi(ext2)h Fj(\014lesystem.)66 -b(Lo)s(ok)38 b(at)150 2591 y Fi(http://www.netspace.net.)o(au/~)o(reit) -o(er/)o(e2co)o(mpr)p Fj(.)150 2880 y Fk(4.5)68 b(T)-11 -b(esting)150 3072 y Fj(A)30 b(record)h(of)f(the)h(tests)h(I'v)m(e)f -(done.)150 3229 y(First,)g(some)g(data)g(sets:)225 3386 -y Fh(\017)60 b Fj(B:)29 b(a)g(directory)h(con)m(taining)h(6001)f -(\014les,)g(one)f(for)g(ev)m(ery)h(length)g(in)e(the)i(range)f(0)g(to)h -(6000)h(b)m(ytes.)330 3496 y(The)f(\014les)h(con)m(tain)g(random)f(lo)m -(w)m(ercase)j(letters.)42 b(18.7)32 b(megab)m(ytes.)225 -3633 y Fh(\017)60 b Fj(H:)33 b(m)m(y)h(home)f(directory)h(tree.)50 -b(Do)s(cumen)m(ts,)34 b(source)f(co)s(de,)h(mail)g(\014les,)g -(compressed)f(data.)49 b(H)330 3743 y(con)m(tains)39 -b(B,)e(and)g(also)i(a)e(directory)i(of)f(\014les)g(designed)f(as)h(b)s -(oundary)e(cases)i(for)g(the)g(sorting;)330 3853 y(mostly)31 -b(v)m(ery)g(rep)s(etitiv)m(e,)i(nast)m(y)e(\014les.)40 -b(565)32 b(megab)m(ytes.)225 3990 y Fh(\017)60 b Fj(A:)40 -b(directory)i(tree)g(holding)e(v)-5 b(arious)41 b(applications)g(built) -g(from)f(source:)62 b Fi(egcs)p Fj(,)42 b Fi(gcc-2.8.1)p -Fj(,)330 4100 y(KDE,)30 b(GTK,)h(Octa)m(v)m(e,)j(etc.)42 -b(2200)32 b(megab)m(ytes.)150 4285 y(The)i(tests)h(conducted)f(are)h -(as)f(follo)m(ws.)53 b(Eac)m(h)34 b(test)i(means)d(compressing)i(\(a)f -(cop)m(y)h(of)10 b(\))35 b(eac)m(h)g(\014le)f(in)150 -4394 y(the)d(data)f(set,)i(decompressing)e(it)h(and)f(comparing)h(it)g -(against)g(the)g(original.)150 4551 y(First,)36 b(a)f(bunc)m(h)f(of)h -(tests)g(with)g(blo)s(c)m(k)g(sizes)g(and)f(in)m(ternal)i(bu\013er)e -(sizes)h(set)g(v)m(ery)h(small,)g(to)f(detect)150 4661 -y(an)m(y)h(problems)g(with)h(the)f(blo)s(c)m(king)h(and)f(bu\013ering)g -(mec)m(hanisms.)59 b(This)36 b(required)g(mo)s(difying)g(the)150 -4770 y(source)31 b(co)s(de)f(so)h(as)f(to)i(try)e(to)i(break)e(it.)199 -4927 y(1.)61 b(Data)31 b(set)g(H,)g(with)f(bu\013er)g(size)h(of)f(1)h -(b)m(yte,)h(and)d(blo)s(c)m(k)i(size)g(of)g(23)g(b)m(ytes.)199 -5065 y(2.)61 b(Data)31 b(set)g(B,)f(bu\013er)g(sizes)h(1)g(b)m(yte,)g -(blo)s(c)m(k)g(size)g(1)g(b)m(yte.)199 5202 y(3.)61 b(As)30 -b(\(2\))h(but)f(small-mo)s(de)h(decompression.)199 5340 -y(4.)61 b(As)30 b(\(2\))h(with)g(blo)s(c)m(k)f(size)i(2)e(b)m(ytes.)p -eop -%%Page: 36 37 -36 36 bop 150 -116 a Fl(Chapter)30 b(4:)41 b(Miscellanea)2586 -b(36)199 299 y Fj(5.)61 b(As)30 b(\(2\))h(with)g(blo)s(c)m(k)f(size)i -(3)e(b)m(ytes.)199 431 y(6.)61 b(As)30 b(\(2\))h(with)g(blo)s(c)m(k)f -(size)i(4)e(b)m(ytes.)199 564 y(7.)61 b(As)30 b(\(2\))h(with)g(blo)s(c) -m(k)f(size)i(5)e(b)m(ytes.)199 697 y(8.)61 b(As)30 b(\(2\))h(with)g -(blo)s(c)m(k)f(size)i(6)e(b)m(ytes)h(and)f(small-mo)s(de)h -(decompression.)199 829 y(9.)61 b(H)31 b(with)f(bu\013er)f(size)j(of)e -(1)h(b)m(yte,)g(but)f(normal)g(blo)s(c)m(k)h(size)h(\(up)d(to)j(900000) -h(b)m(ytes\).)150 1009 y(Then)d(some)h(tests)g(with)f(unmo)s(di\014ed)f -(source)h(co)s(de.)199 1166 y(1.)61 b(H,)31 b(all)g(settings)g(normal.) -199 1299 y(2.)61 b(As)30 b(\(1\),)i(with)e(small-mo)s(de)g(decompress.) -199 1431 y(3.)61 b(H,)31 b(compress)f(with)g(\015ag)h -Fi(-1)p Fj(.)199 1564 y(4.)61 b(H,)31 b(compress)f(with)g(\015ag)h -Fi(-s)p Fj(,)f(decompress)g(with)g(\015ag)h Fi(-s)p Fj(.)199 -1697 y(5.)61 b(F)-8 b(orw)m(ards)31 b(compatibilit)m(y:)43 -b(H,)31 b Fi(bzip2-0.1pl2)d Fj(compressing,)j Fi(bzip2-0.9.5)c -Fj(decompressing,)330 1806 y(all)k(settings)h(normal.)199 -1939 y(6.)61 b(Bac)m(kw)m(ards)38 b(compatibilit)m(y:)59 -b(H,)39 b Fi(bzip2-0.9.5)c Fj(compressing,)41 b Fi(bzip2-0.1pl2)35 -b Fj(decompress-)330 2048 y(ing,)c(all)g(settings)g(normal.)199 -2181 y(7.)61 b(Bigger)31 b(tests:)42 b(A,)30 b(all)h(settings)h -(normal.)199 2314 y(8.)61 b(As)30 b(\(7\),)i(using)d(the)i(fallbac)m(k) -g(\(Sadak)-5 b(ane-lik)m(e\))33 b(sorting)e(algorithm.)199 -2446 y(9.)61 b(As)30 b(\(8\),)i(compress)e(with)g(\015ag)h -Fi(-1)p Fj(,)f(decompress)g(with)g(\015ag)h Fi(-s)p Fj(.)154 -2579 y(10.)61 b(H,)31 b(using)f(the)g(fallbac)m(k)i(sorting)f -(algorithm.)154 2711 y(11.)61 b(F)-8 b(orw)m(ards)31 -b(compatibilit)m(y:)44 b(A,)31 b Fi(bzip2-0.1pl2)d Fj(compressing,)k -Fi(bzip2-0.9.5)27 b Fj(decompressing,)330 2821 y(all)k(settings)h -(normal.)154 2954 y(12.)61 b(Bac)m(kw)m(ards)39 b(compatibilit)m(y:)59 -b(A,)39 b Fi(bzip2-0.9.5)c Fj(compressing,)41 b Fi(bzip2-0.1pl2)36 -b Fj(decompress-)330 3063 y(ing,)31 b(all)g(settings)g(normal.)154 -3196 y(13.)61 b(Misc)38 b(test:)57 b(ab)s(out)37 b(400)i(megab)m(ytes)g -(of)f Fi(.tar)f Fj(\014les)g(with)h Fi(bzip2)e Fj(compiled)i(with)g -(Chec)m(k)m(er)g(\(a)330 3305 y(memory)31 b(access)g(error)g(detector,) -h(lik)m(e)g(Purify\).)154 3438 y(14.)61 b(Misc)31 b(tests)h(to)f(mak)m -(e)g(sure)f(it)h(builds)e(and)h(runs)f(ok)h(on)h(non-Lin)m(ux/x86)f -(platforms.)150 3618 y(These)k(tests)g(w)m(ere)h(conducted)e(on)h(a)f -(225)i(MHz)f(IDT)g(WinChip)e(mac)m(hine,)j(running)d(Lin)m(ux)g -(2.0.36.)150 3728 y(They)c(represen)m(t)h(nearly)g(a)f(w)m(eek)h(of)f -(con)m(tin)m(uous)h(computation.)41 b(All)29 b(tests)g(completed)g -(successfully)-8 b(.)150 4003 y Fk(4.6)68 b(F)-11 b(urther)44 -b(reading)150 4196 y Fi(bzip2)26 b Fj(is)h(not)h(researc)m(h)g(w)m -(ork,)g(in)f(the)h(sense)f(that)h(it)g(do)s(esn't)f(presen)m(t)h(an)m -(y)f(new)g(ideas.)40 b(Rather,)28 b(it's)150 4306 y(an)i(engineering)h -(exercise)h(based)e(on)g(existing)i(ideas.)150 4463 y(F)-8 -b(our)31 b(do)s(cumen)m(ts)f(describ)s(e)g(essen)m(tially)i(all)f(the)g -(ideas)f(b)s(ehind)f Fi(bzip2)p Fj(:)390 4614 y Fi(Michael)46 -b(Burrows)g(and)h(D.)g(J.)g(Wheeler:)485 4717 y("A)h(block-sorting)c -(lossless)h(data)i(compression)e(algorithm")533 4821 -y(10th)i(May)g(1994.)533 4925 y(Digital)f(SRC)h(Research)e(Report)i -(124.)533 5029 y(ftp://ftp.digital.com/pub)o(/DEC)o(/SR)o(C/re)o(sear)o -(ch-)o(repo)o(rts/)o(SRC)o(-124)o(.ps.)o(gz)533 5132 -y(If)g(you)g(have)g(trouble)f(finding)g(it,)g(try)h(searching)f(at)h -(the)533 5236 y(New)g(Zealand)f(Digital)g(Library,)f -(http://www.nzdl.org.)p eop -%%Page: 37 38 -37 37 bop 150 -116 a Fl(Chapter)30 b(4:)41 b(Miscellanea)2586 -b(37)390 299 y Fi(Daniel)46 b(S.)h(Hirschberg)e(and)i(Debra)g(A.)g -(LeLewer)485 403 y("Efficient)e(Decoding)h(of)h(Prefix)f(Codes")533 -506 y(Communications)e(of)j(the)g(ACM,)g(April)f(1990,)h(Vol)f(33,)h -(Number)f(4.)533 610 y(You)h(might)f(be)i(able)e(to)h(get)g(an)h -(electronic)d(copy)h(of)h(this)676 714 y(from)g(the)g(ACM)g(Digital)f -(Library.)390 922 y(David)g(J.)i(Wheeler)533 1025 y(Program)e(bred3.c)g -(and)h(accompanying)d(document)i(bred3.ps.)533 1129 y(This)h(contains)e -(the)i(idea)g(behind)f(the)h(multi-table)e(Huffman)533 -1233 y(coding)h(scheme.)533 1337 y(ftp://ftp.cl.cam.ac.uk/us)o(ers/)o -(djw)o(3/)390 1544 y(Jon)h(L.)g(Bentley)f(and)h(Robert)f(Sedgewick)485 -1648 y("Fast)h(Algorithms)e(for)i(Sorting)f(and)g(Searching)g(Strings") -533 1752 y(Available)f(from)i(Sedgewick's)e(web)i(page,)533 -1856 y(www.cs.princeton.edu/~rs)150 2012 y Fj(The)29 -b(follo)m(wing)h(pap)s(er)d(giv)m(es)j(v)-5 b(aluable)29 -b(additional)g(insigh)m(ts)g(in)m(to)h(the)f(algorithm,)h(but)e(is)h -(not)g(imme-)150 2122 y(diately)i(the)g(basis)f(of)h(an)m(y)f(co)s(de)h -(used)e(in)h(bzip2.)390 2273 y Fi(Peter)46 b(Fenwick:)533 -2377 y(Block)h(Sorting)e(Text)i(Compression)533 2481 -y(Proceedings)e(of)i(the)g(19th)g(Australasian)d(Computer)i(Science)f -(Conference,)629 2584 y(Melbourne,)g(Australia.)92 b(Jan)47 -b(31)g(-)h(Feb)f(2,)g(1996.)533 2688 y(ftp://ftp.cs.auckland.ac.)o -(nz/p)o(ub/)o(pete)o(r-f/)o(ACS)o(C96p)o(aper)o(.ps)150 -2845 y Fj(Kunihik)m(o)30 b(Sadak)-5 b(ane's)30 b(sorting)i(algorithm,)f -(men)m(tioned)g(ab)s(o)m(v)m(e,)h(is)e(a)m(v)-5 b(ailable)32 -b(from:)390 2996 y Fi(http://naomi.is.s.u-toky)o(o.ac)o(.jp/)o(~sa)o -(da/p)o(aper)o(s/S)o(ada9)o(8b.p)o(s.g)o(z)150 3153 y -Fj(The)38 b(Man)m(b)s(er-My)m(ers)h(su\016x)e(arra)m(y)i(construction)g -(algorithm)h(is)e(describ)s(ed)f(in)h(a)g(pap)s(er)f(a)m(v)-5 -b(ailable)150 3262 y(from:)390 3413 y Fi(http://www.cs.arizona.ed)o -(u/pe)o(ople)o(/ge)o(ne/P)o(APER)o(S/s)o(uffi)o(x.ps)150 -3570 y Fj(Finally)d(,)33 b(the)e(follo)m(wing)i(pap)s(er)d(do)s(cumen)m -(ts)h(some)g(recen)m(t)i(in)m(v)m(estigations)h(I)d(made)g(in)m(to)i -(the)e(p)s(erfor-)150 3680 y(mance)g(of)f(sorting)h(algorithms:)390 -3831 y Fi(Julian)46 b(Seward:)533 3935 y(On)h(the)g(Performance)e(of)i -(BWT)g(Sorting)f(Algorithms)533 4038 y(Proceedings)f(of)i(the)g(IEEE)g -(Data)f(Compression)f(Conference)g(2000)629 4142 y(Snowbird,)g(Utah.)94 -b(28-30)46 b(March)h(2000.)p eop -%%Page: -1 39 --1 38 bop 3725 -116 a Fl(i)150 299 y Fg(T)-13 b(able)54 -b(of)g(Con)l(ten)l(ts)150 641 y Fk(1)135 b(In)l(tro)t(duction)15 -b Fa(.)20 b(.)f(.)h(.)f(.)g(.)h(.)f(.)h(.)f(.)h(.)f(.)g(.)h(.)f(.)h(.)f -(.)h(.)f(.)g(.)h(.)f(.)h(.)f(.)h(.)f(.)g(.)h(.)f(.)h(.)f(.)60 -b Fk(2)150 911 y(2)135 b(Ho)l(w)45 b(to)h(use)f Fc(bzip2)31 -b Fa(.)19 b(.)g(.)h(.)f(.)h(.)f(.)h(.)f(.)g(.)h(.)f(.)h(.)f(.)h(.)f(.)g -(.)h(.)f(.)h(.)f(.)h(.)f(.)g(.)h(.)f(.)78 b Fk(3)1047 -1048 y Fj(NAME)18 b Fb(.)d(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)h(.)f(.)g(.)g(.)g(.) -g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)48 b Fl(3)1047 -1157 y Fj(SYNOPSIS)18 b Fb(.)c(.)h(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)h(.)f(.)g(.)g(.)g(.)g(.) -g(.)g(.)g(.)g(.)g(.)g(.)g(.)48 b Fl(3)1047 1267 y Fj(DESCRIPTION)28 -b Fb(.)15 b(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)h(.)f(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)59 -b Fl(3)1047 1377 y Fj(OPTIONS)16 b Fb(.)f(.)g(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)h(.)f(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.) -g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)46 b Fl(4)1047 -1486 y Fj(MEMOR)-8 b(Y)31 b(MANA)m(GEMENT)9 b Fb(.)16 -b(.)f(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)39 b Fl(6)1047 1596 y Fj(RECO)m(VERING)30 b(D)m(A)-8 -b(T)g(A)31 b(FR)m(OM)g(D)m(AMA)m(GED)g(FILES)1256 1705 -y Fb(.)15 b(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)h(.)f(.)g(.)g(.)g(.)g(.)g(.) -g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)57 b Fl(7)1047 1815 y -Fj(PERF)m(ORMANCE)30 b(NOTES)10 b Fb(.)15 b(.)g(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)40 -b Fl(8)1047 1924 y Fj(CA)-10 b(VEA)i(TS)12 b Fb(.)h(.)i(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)h(.)f(.)g(.)g(.)g(.)g(.)g(.)g(.) -g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)42 -b Fl(8)1047 2034 y Fj(A)m(UTHOR)22 b Fb(.)15 b(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)h(.)f(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.) -g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)52 b -Fl(8)150 2276 y Fk(3)135 b(Programming)46 b(with)f Fc(libbzip2)27 -b Fa(.)16 b(.)j(.)g(.)h(.)f(.)h(.)f(.)h(.)f(.)h(.)f(.)g(.)h(.)f(.)72 -b Fk(10)449 2413 y Fj(3.1)92 b(T)-8 b(op-lev)m(el)33 -b(structure)18 b Fb(.)d(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)h(.)f(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.) -g(.)g(.)g(.)g(.)g(.)g(.)47 b Fl(10)748 2523 y Fj(3.1.1)93 -b(Lo)m(w-lev)m(el)32 b(summary)20 b Fb(.)15 b(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.) -g(.)g(.)g(.)50 b Fl(10)748 2633 y Fj(3.1.2)93 b(High-lev)m(el)32 -b(summary)27 b Fb(.)15 b(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.) -g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)57 -b Fl(10)748 2742 y Fj(3.1.3)93 b(Utilit)m(y)32 b(functions)e(summary)10 -b Fb(.)k(.)h(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.) -g(.)g(.)g(.)g(.)g(.)39 b Fl(11)449 2852 y Fj(3.2)92 b(Error)30 -b(handling)15 b Fb(.)f(.)h(.)g(.)g(.)h(.)f(.)g(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.) -g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)h(.)f(.)44 b Fl(11)449 -2961 y Fj(3.3)92 b(Lo)m(w-lev)m(el)32 b(in)m(terface)26 -b Fb(.)15 b(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g -(.)h(.)f(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.) -g(.)g(.)g(.)53 b Fl(13)748 3071 y Fj(3.3.1)93 b Fi(BZ2_bzCompressInit) -21 b Fb(.)9 b(.)15 b(.)g(.)g(.)g(.)g(.)h(.)f(.)g(.)g(.)g(.)g(.)g(.)g(.) -g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)50 b Fl(13)748 -3181 y Fj(3.3.2)93 b Fi(BZ2_bzCompress)9 b Fb(.)h(.)15 -b(.)g(.)g(.)g(.)g(.)g(.)h(.)f(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)38 b Fl(15)748 -3290 y Fj(3.3.3)93 b Fi(BZ2_bzCompressEnd)23 b Fb(.)10 -b(.)15 b(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.) -g(.)h(.)f(.)g(.)g(.)g(.)g(.)g(.)g(.)52 b Fl(18)748 3400 -y Fj(3.3.4)93 b Fi(BZ2_bzDecompressInit)16 b Fb(.)9 b(.)15 -b(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)h(.)f(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)45 b Fl(18)748 3509 y Fj(3.3.5)93 -b Fi(BZ2_bzDecompress)21 b Fb(.)15 b(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.) -g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g -(.)55 b Fl(18)748 3619 y Fj(3.3.6)93 b Fi(BZ2_bzDecompressEnd)18 -b Fb(.)10 b(.)15 b(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)48 b Fl(20)449 -3729 y Fj(3.4)92 b(High-lev)m(el)33 b(in)m(terface)9 -b Fb(.)17 b(.)e(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)h(.)f(.)g(.)g(.)g(.) -g(.)g(.)g(.)38 b Fl(20)748 3838 y Fj(3.4.1)93 b Fi(BZ2_bzReadOpen)9 -b Fb(.)h(.)15 b(.)g(.)g(.)g(.)g(.)g(.)h(.)f(.)g(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)38 -b Fl(20)748 3948 y Fj(3.4.2)93 b Fi(BZ2_bzRead)18 b Fb(.)12 -b(.)j(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)h(.)f(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)47 -b Fl(21)748 4057 y Fj(3.4.3)93 b Fi(BZ2_bzReadGetUnused)18 -b Fb(.)10 b(.)15 b(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)48 b Fl(23)748 -4167 y Fj(3.4.4)93 b Fi(BZ2_bzReadClose)23 b Fb(.)15 -b(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)h(.)f(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)57 b Fl(23)748 -4276 y Fj(3.4.5)93 b Fi(BZ2_bzWriteOpen)23 b Fb(.)15 -b(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)h(.)f(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)57 b Fl(23)748 -4386 y Fj(3.4.6)93 b Fi(BZ2_bzWrite)16 b Fb(.)11 b(.)k(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)h(.)f(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.) -g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)45 b Fl(24)748 -4496 y Fj(3.4.7)93 b Fi(BZ2_bzWriteClose)21 b Fb(.)15 -b(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)55 b Fl(25)748 -4605 y Fj(3.4.8)93 b(Handling)30 b(em)m(b)s(edded)f(compressed)h(data)h -(streams)9 b Fb(.)15 b(.)g(.)g(.)38 b Fl(25)748 4715 -y Fj(3.4.9)93 b(Standard)29 b(\014le-reading/writing)j(co)s(de)16 -b Fb(.)f(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)h(.)45 -b Fl(26)449 4824 y Fj(3.5)92 b(Utilit)m(y)32 b(functions)c -Fb(.)15 b(.)g(.)g(.)g(.)h(.)f(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.) -g(.)g(.)g(.)g(.)h(.)57 b Fl(27)748 4934 y Fj(3.5.1)93 -b Fi(BZ2_bzBuffToBuffCompres)o(s)22 b Fb(.)15 b(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)h(.)f(.)g(.)g(.)g(.)57 b Fl(27)748 -5044 y Fj(3.5.2)93 b Fi(BZ2_bzBuffToBuffDecompr)o(ess)17 -b Fb(.)e(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.) -53 b Fl(28)449 5153 y Fj(3.6)92 b Fi(zlib)29 b Fj(compatibilit)m(y)k -(functions)23 b Fb(.)15 b(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)h(.)52 -b Fl(29)449 5263 y Fj(3.7)92 b(Using)30 b(the)h(library)f(in)g(a)h -Fi(stdio)p Fj(-free)f(en)m(vironmen)m(t)12 b Fb(.)k(.)f(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)41 b Fl(30)p eop -%%Page: -2 40 --2 39 bop 3699 -116 a Fl(ii)748 83 y Fj(3.7.1)93 b(Getting)32 -b(rid)e(of)g Fi(stdio)17 b Fb(.)d(.)h(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)h(.)f(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)46 -b Fl(30)748 193 y Fj(3.7.2)93 b(Critical)31 b(error)g(handling)18 -b Fb(.)c(.)h(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)h(.)f(.)g(.)g(.) -g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)47 b Fl(30)449 302 y -Fj(3.8)92 b(Making)31 b(a)f(Windo)m(ws)g(DLL)17 b Fb(.)d(.)h(.)g(.)g(.) -g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)h(.)f(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)46 b Fl(31)150 -545 y Fk(4)135 b(Miscellanea)11 b Fa(.)21 b(.)f(.)f(.)h(.)f(.)g(.)h(.)f -(.)h(.)f(.)h(.)f(.)g(.)h(.)f(.)h(.)f(.)h(.)f(.)g(.)h(.)f(.)h(.)f(.)h(.) -f(.)h(.)f(.)g(.)h(.)56 b Fk(32)449 682 y Fj(4.1)92 b(Limitations)31 -b(of)g(the)g(compressed)f(\014le)g(format)c Fb(.)15 b(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)h(.)f(.)g(.)54 b Fl(32)449 -791 y Fj(4.2)92 b(P)m(ortabilit)m(y)33 b(issues)12 b -Fb(.)j(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)h(.)f(.)g(.)g(.)g(.)g(.)g(.)g(.) -g(.)g(.)g(.)g(.)41 b Fl(33)449 901 y Fj(4.3)92 b(Rep)s(orting)31 -b(bugs)24 b Fb(.)15 b(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)h(.)f(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.) -g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)54 b Fl(33)449 1010 -y Fj(4.4)92 b(Did)30 b(y)m(ou)g(get)i(the)f(righ)m(t)g(pac)m(k)-5 -b(age?)18 b Fb(.)f(.)e(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)h(.)f(.)g(.)47 b Fl(35)449 -1120 y Fj(4.5)92 b(T)-8 b(esting)16 b Fb(.)h(.)e(.)g(.)g(.)g(.)g(.)g(.) -g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)h(.)f(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.) -g(.)g(.)g(.)g(.)g(.)46 b Fl(35)449 1230 y Fj(4.6)92 b(F)-8 -b(urther)30 b(reading)17 b Fb(.)f(.)f(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g -(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.) -g(.)g(.)h(.)f(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)g(.)46 b -Fl(36)p eop -%%Trailer -end -userdict /end-hook known{end-hook}if -%%EOF diff --git a/manual.texi b/manual.texi deleted file mode 100644 index 5bc27d5..0000000 --- a/manual.texi +++ /dev/null @@ -1,2243 +0,0 @@ -\input texinfo @c -*- Texinfo -*- -@setfilename bzip2.info - -@ignore -This file documents bzip2 version 1.0.2, and associated library -libbzip2, written by Julian Seward (jseward@acm.org). - -Copyright (C) 1996-2002 Julian R Seward - -Permission is granted to make and distribute verbatim copies of -this manual provided the copyright notice and this permission notice -are preserved on all copies. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for verbatim copies. -@end ignore - -@ifinfo -@format -START-INFO-DIR-ENTRY -* Bzip2: (bzip2). A program and library for data compression. -END-INFO-DIR-ENTRY -@end format - -@end ifinfo - -@iftex -@c @finalout -@settitle bzip2 and libbzip2 -@titlepage -@title bzip2 and libbzip2 -@subtitle a program and library for data compression -@subtitle copyright (C) 1996-2002 Julian Seward -@subtitle version 1.0.2 of 30 December 2001 -@author Julian Seward - -@end titlepage - -@parindent 0mm -@parskip 2mm - -@end iftex -@node Top,,, (dir) - -The following text is the License for this software. You should -find it identical to that contained in the file LICENSE in the -source distribution. - -@bf{------------------ START OF THE LICENSE ------------------} - -This program, @code{bzip2}, -and associated library @code{libbzip2}, are -Copyright (C) 1996-2002 Julian R Seward. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -@itemize @bullet -@item - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. -@item - The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment in the product - documentation would be appreciated but is not required. -@item - Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. -@item - The name of the author may not be used to endorse or promote - products derived from this software without specific prior written - permission. -@end itemize -THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS -OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY -DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE -GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -Julian Seward, Cambridge, UK. - -@code{jseward@@acm.org} - -@code{bzip2}/@code{libbzip2} version 1.0.2 of 30 December 2001. - -@bf{------------------ END OF THE LICENSE ------------------} - -Web sites: - -@code{http://sources.redhat.com/bzip2} - -@code{http://www.cacheprof.org} - -PATENTS: To the best of my knowledge, @code{bzip2} does not use any patented -algorithms. However, I do not have the resources available to carry out -a full patent search. Therefore I cannot give any guarantee of the -above statement. - - - - - - - -@chapter Introduction - -@code{bzip2} compresses files using the Burrows-Wheeler -block-sorting text compression algorithm, and Huffman coding. -Compression is generally considerably better than that -achieved by more conventional LZ77/LZ78-based compressors, -and approaches the performance of the PPM family of statistical compressors. - -@code{bzip2} is built on top of @code{libbzip2}, a flexible library -for handling compressed data in the @code{bzip2} format. This manual -describes both how to use the program and -how to work with the library interface. Most of the -manual is devoted to this library, not the program, -which is good news if your interest is only in the program. - -Chapter 2 describes how to use @code{bzip2}; this is the only part -you need to read if you just want to know how to operate the program. -Chapter 3 describes the programming interfaces in detail, and -Chapter 4 records some miscellaneous notes which I thought -ought to be recorded somewhere. - - -@chapter How to use @code{bzip2} - -This chapter contains a copy of the @code{bzip2} man page, -and nothing else. - -@quotation - -@unnumberedsubsubsec NAME -@itemize -@item @code{bzip2}, @code{bunzip2} -- a block-sorting file compressor, v1.0.2 -@item @code{bzcat} -- decompresses files to stdout -@item @code{bzip2recover} -- recovers data from damaged bzip2 files -@end itemize - -@unnumberedsubsubsec SYNOPSIS -@itemize -@item @code{bzip2} [ -cdfkqstvzVL123456789 ] [ filenames ... ] -@item @code{bunzip2} [ -fkvsVL ] [ filenames ... ] -@item @code{bzcat} [ -s ] [ filenames ... ] -@item @code{bzip2recover} filename -@end itemize - -@unnumberedsubsubsec DESCRIPTION - -@code{bzip2} compresses files using the Burrows-Wheeler block sorting -text compression algorithm, and Huffman coding. Compression is -generally considerably better than that achieved by more conventional -LZ77/LZ78-based compressors, and approaches the performance of the PPM -family of statistical compressors. - -The command-line options are deliberately very similar to those of GNU -@code{gzip}, but they are not identical. - -@code{bzip2} expects a list of file names to accompany the command-line -flags. Each file is replaced by a compressed version of itself, with -the name @code{original_name.bz2}. Each compressed file has the same -modification date, permissions, and, when possible, ownership as the -corresponding original, so that these properties can be correctly -restored at decompression time. File name handling is naive in the -sense that there is no mechanism for preserving original file names, -permissions, ownerships or dates in filesystems which lack these -concepts, or have serious file name length restrictions, such as MS-DOS. - -@code{bzip2} and @code{bunzip2} will by default not overwrite existing -files. If you want this to happen, specify the @code{-f} flag. - -If no file names are specified, @code{bzip2} compresses from standard -input to standard output. In this case, @code{bzip2} will decline to -write compressed output to a terminal, as this would be entirely -incomprehensible and therefore pointless. - -@code{bunzip2} (or @code{bzip2 -d}) decompresses all -specified files. Files which were not created by @code{bzip2} -will be detected and ignored, and a warning issued. -@code{bzip2} attempts to guess the filename for the decompressed file -from that of the compressed file as follows: -@itemize -@item @code{filename.bz2 } becomes @code{filename} -@item @code{filename.bz } becomes @code{filename} -@item @code{filename.tbz2} becomes @code{filename.tar} -@item @code{filename.tbz } becomes @code{filename.tar} -@item @code{anyothername } becomes @code{anyothername.out} -@end itemize -If the file does not end in one of the recognised endings, -@code{.bz2}, @code{.bz}, -@code{.tbz2} or @code{.tbz}, @code{bzip2} complains that it cannot -guess the name of the original file, and uses the original name -with @code{.out} appended. - -As with compression, supplying no -filenames causes decompression from standard input to standard output. - -@code{bunzip2} will correctly decompress a file which is the -concatenation of two or more compressed files. The result is the -concatenation of the corresponding uncompressed files. Integrity -testing (@code{-t}) of concatenated compressed files is also supported. - -You can also compress or decompress files to the standard output by -giving the @code{-c} flag. Multiple files may be compressed and -decompressed like this. The resulting outputs are fed sequentially to -stdout. Compression of multiple files in this manner generates a stream -containing multiple compressed file representations. Such a stream -can be decompressed correctly only by @code{bzip2} version 0.9.0 or -later. Earlier versions of @code{bzip2} will stop after decompressing -the first file in the stream. - -@code{bzcat} (or @code{bzip2 -dc}) decompresses all specified files to -the standard output. - -@code{bzip2} will read arguments from the environment variables -@code{BZIP2} and @code{BZIP}, in that order, and will process them -before any arguments read from the command line. This gives a -convenient way to supply default arguments. - -Compression is always performed, even if the compressed file is slightly -larger than the original. Files of less than about one hundred bytes -tend to get larger, since the compression mechanism has a constant -overhead in the region of 50 bytes. Random data (including the output -of most file compressors) is coded at about 8.05 bits per byte, giving -an expansion of around 0.5%. - -As a self-check for your protection, @code{bzip2} uses 32-bit CRCs to -make sure that the decompressed version of a file is identical to the -original. This guards against corruption of the compressed data, and -against undetected bugs in @code{bzip2} (hopefully very unlikely). The -chances of data corruption going undetected is microscopic, about one -chance in four billion for each file processed. Be aware, though, that -the check occurs upon decompression, so it can only tell you that -something is wrong. It can't help you recover the original uncompressed -data. You can use @code{bzip2recover} to try to recover data from -damaged files. - -Return values: 0 for a normal exit, 1 for environmental problems (file -not found, invalid flags, I/O errors, &c), 2 to indicate a corrupt -compressed file, 3 for an internal consistency error (eg, bug) which -caused @code{bzip2} to panic. - - -@unnumberedsubsubsec OPTIONS -@table @code -@item -c --stdout -Compress or decompress to standard output. -@item -d --decompress -Force decompression. @code{bzip2}, @code{bunzip2} and @code{bzcat} are -really the same program, and the decision about what actions to take is -done on the basis of which name is used. This flag overrides that -mechanism, and forces bzip2 to decompress. -@item -z --compress -The complement to @code{-d}: forces compression, regardless of the -invokation name. -@item -t --test -Check integrity of the specified file(s), but don't decompress them. -This really performs a trial decompression and throws away the result. -@item -f --force -Force overwrite of output files. Normally, @code{bzip2} will not overwrite -existing output files. Also forces @code{bzip2} to break hard links -to files, which it otherwise wouldn't do. - -@code{bzip2} normally declines to decompress files which don't have the -correct magic header bytes. If forced (@code{-f}), however, it will -pass such files through unmodified. This is how GNU @code{gzip} -behaves. -@item -k --keep -Keep (don't delete) input files during compression -or decompression. -@item -s --small -Reduce memory usage, for compression, decompression and testing. Files -are decompressed and tested using a modified algorithm which only -requires 2.5 bytes per block byte. This means any file can be -decompressed in 2300k of memory, albeit at about half the normal speed. - -During compression, @code{-s} selects a block size of 200k, which limits -memory use to around the same figure, at the expense of your compression -ratio. In short, if your machine is low on memory (8 megabytes or -less), use -s for everything. See MEMORY MANAGEMENT below. -@item -q --quiet -Suppress non-essential warning messages. Messages pertaining to -I/O errors and other critical events will not be suppressed. -@item -v --verbose -Verbose mode -- show the compression ratio for each file processed. -Further @code{-v}'s increase the verbosity level, spewing out lots of -information which is primarily of interest for diagnostic purposes. -@item -L --license -V --version -Display the software version, license terms and conditions. -@item -1 (or --fast) to -9 (or --best) -Set the block size to 100 k, 200 k .. 900 k when compressing. Has no -effect when decompressing. See MEMORY MANAGEMENT below. -The @code{--fast} and @code{--best} aliases are primarily for GNU -@code{gzip} compatibility. In particular, @code{--fast} doesn't make -things significantly faster. And @code{--best} merely selects the -default behaviour. -@item -- -Treats all subsequent arguments as file names, even if they start -with a dash. This is so you can handle files with names beginning -with a dash, for example: @code{bzip2 -- -myfilename}. -@item --repetitive-fast -@item --repetitive-best -These flags are redundant in versions 0.9.5 and above. They provided -some coarse control over the behaviour of the sorting algorithm in -earlier versions, which was sometimes useful. 0.9.5 and above have an -improved algorithm which renders these flags irrelevant. -@end table - - -@unnumberedsubsubsec MEMORY MANAGEMENT - -@code{bzip2} compresses large files in blocks. The block size affects -both the compression ratio achieved, and the amount of memory needed for -compression and decompression. The flags @code{-1} through @code{-9} -specify the block size to be 100,000 bytes through 900,000 bytes (the -default) respectively. At decompression time, the block size used for -compression is read from the header of the compressed file, and -@code{bunzip2} then allocates itself just enough memory to decompress -the file. Since block sizes are stored in compressed files, it follows -that the flags @code{-1} to @code{-9} are irrelevant to and so ignored -during decompression. - -Compression and decompression requirements, in bytes, can be estimated -as: -@example - Compression: 400k + ( 8 x block size ) - - Decompression: 100k + ( 4 x block size ), or - 100k + ( 2.5 x block size ) -@end example -Larger block sizes give rapidly diminishing marginal returns. Most of -the compression comes from the first two or three hundred k of block -size, a fact worth bearing in mind when using @code{bzip2} on small machines. -It is also important to appreciate that the decompression memory -requirement is set at compression time by the choice of block size. - -For files compressed with the default 900k block size, @code{bunzip2} -will require about 3700 kbytes to decompress. To support decompression -of any file on a 4 megabyte machine, @code{bunzip2} has an option to -decompress using approximately half this amount of memory, about 2300 -kbytes. Decompression speed is also halved, so you should use this -option only where necessary. The relevant flag is @code{-s}. - -In general, try and use the largest block size memory constraints allow, -since that maximises the compression achieved. Compression and -decompression speed are virtually unaffected by block size. - -Another significant point applies to files which fit in a single block --- that means most files you'd encounter using a large block size. The -amount of real memory touched is proportional to the size of the file, -since the file is smaller than a block. For example, compressing a file -20,000 bytes long with the flag @code{-9} will cause the compressor to -allocate around 7600k of memory, but only touch 400k + 20000 * 8 = 560 -kbytes of it. Similarly, the decompressor will allocate 3700k but only -touch 100k + 20000 * 4 = 180 kbytes. - -Here is a table which summarises the maximum memory usage for different -block sizes. Also recorded is the total compressed size for 14 files of -the Calgary Text Compression Corpus totalling 3,141,622 bytes. This -column gives some feel for how compression varies with block size. -These figures tend to understate the advantage of larger block sizes for -larger files, since the Corpus is dominated by smaller files. -@example - Compress Decompress Decompress Corpus - Flag usage usage -s usage Size - - -1 1200k 500k 350k 914704 - -2 2000k 900k 600k 877703 - -3 2800k 1300k 850k 860338 - -4 3600k 1700k 1100k 846899 - -5 4400k 2100k 1350k 845160 - -6 5200k 2500k 1600k 838626 - -7 6100k 2900k 1850k 834096 - -8 6800k 3300k 2100k 828642 - -9 7600k 3700k 2350k 828642 -@end example - -@unnumberedsubsubsec RECOVERING DATA FROM DAMAGED FILES - -@code{bzip2} compresses files in blocks, usually 900kbytes long. Each -block is handled independently. If a media or transmission error causes -a multi-block @code{.bz2} file to become damaged, it may be possible to -recover data from the undamaged blocks in the file. - -The compressed representation of each block is delimited by a 48-bit -pattern, which makes it possible to find the block boundaries with -reasonable certainty. Each block also carries its own 32-bit CRC, so -damaged blocks can be distinguished from undamaged ones. - -@code{bzip2recover} is a simple program whose purpose is to search for -blocks in @code{.bz2} files, and write each block out into its own -@code{.bz2} file. You can then use @code{bzip2 -t} to test the -integrity of the resulting files, and decompress those which are -undamaged. - -@code{bzip2recover} -takes a single argument, the name of the damaged file, and writes a -number of files @code{rec00001file.bz2}, @code{rec00002file.bz2}, etc, -containing the extracted blocks. The output filenames are designed so -that the use of wildcards in subsequent processing -- for example, -@code{bzip2 -dc rec*file.bz2 > recovered_data} -- processes the files in -the correct order. - -@code{bzip2recover} should be of most use dealing with large @code{.bz2} -files, as these will contain many blocks. It is clearly futile to use -it on damaged single-block files, since a damaged block cannot be -recovered. If you wish to minimise any potential data loss through -media or transmission errors, you might consider compressing with a -smaller block size. - - -@unnumberedsubsubsec PERFORMANCE NOTES - -The sorting phase of compression gathers together similar strings in the -file. Because of this, files containing very long runs of repeated -symbols, like "aabaabaabaab ..." (repeated several hundred times) may -compress more slowly than normal. Versions 0.9.5 and above fare much -better than previous versions in this respect. The ratio between -worst-case and average-case compression time is in the region of 10:1. -For previous versions, this figure was more like 100:1. You can use the -@code{-vvvv} option to monitor progress in great detail, if you want. - -Decompression speed is unaffected by these phenomena. - -@code{bzip2} usually allocates several megabytes of memory to operate -in, and then charges all over it in a fairly random fashion. This means -that performance, both for compressing and decompressing, is largely -determined by the speed at which your machine can service cache misses. -Because of this, small changes to the code to reduce the miss rate have -been observed to give disproportionately large performance improvements. -I imagine @code{bzip2} will perform best on machines with very large -caches. - - -@unnumberedsubsubsec CAVEATS - -I/O error messages are not as helpful as they could be. @code{bzip2} -tries hard to detect I/O errors and exit cleanly, but the details of -what the problem is sometimes seem rather misleading. - -This manual page pertains to version 1.0.2 of @code{bzip2}. Compressed -data created by this version is entirely forwards and backwards -compatible with the previous public releases, versions 0.1pl2, 0.9.0, -0.9.5, 1.0.0 and 1.0.1, but with the following exception: 0.9.0 and -above can correctly decompress multiple concatenated compressed files. -0.1pl2 cannot do this; it will stop after decompressing just the first -file in the stream. - -@code{bzip2recover} versions prior to this one, 1.0.2, used 32-bit -integers to represent bit positions in compressed files, so it could not -handle compressed files more than 512 megabytes long. Version 1.0.2 and -above uses 64-bit ints on some platforms which support them (GNU -supported targets, and Windows). To establish whether or not -@code{bzip2recover} was built with such a limitation, run it without -arguments. In any event you can build yourself an unlimited version if -you can recompile it with @code{MaybeUInt64} set to be an unsigned -64-bit integer. - - - -@unnumberedsubsubsec AUTHOR -Julian Seward, @code{jseward@@acm.org}. - -@code{http://sources.redhat.com/bzip2} - -The ideas embodied in @code{bzip2} are due to (at least) the following -people: Michael Burrows and David Wheeler (for the block sorting -transformation), David Wheeler (again, for the Huffman coder), Peter -Fenwick (for the structured coding model in the original @code{bzip}, -and many refinements), and Alistair Moffat, Radford Neal and Ian Witten -(for the arithmetic coder in the original @code{bzip}). I am much -indebted for their help, support and advice. See the manual in the -source distribution for pointers to sources of documentation. Christian -von Roques encouraged me to look for faster sorting algorithms, so as to -speed up compression. Bela Lubkin encouraged me to improve the -worst-case compression performance. The @code{bz*} scripts are derived -from those of GNU @code{gzip}. Many people sent patches, helped with -portability problems, lent machines, gave advice and were generally -helpful. - -@end quotation - - - - -@chapter Programming with @code{libbzip2} - -This chapter describes the programming interface to @code{libbzip2}. - -For general background information, particularly about memory -use and performance aspects, you'd be well advised to read Chapter 2 -as well. - -@section Top-level structure - -@code{libbzip2} is a flexible library for compressing and decompressing -data in the @code{bzip2} data format. Although packaged as a single -entity, it helps to regard the library as three separate parts: the low -level interface, and the high level interface, and some utility -functions. - -The structure of @code{libbzip2}'s interfaces is similar to -that of Jean-loup Gailly's and Mark Adler's excellent @code{zlib} -library. - -All externally visible symbols have names beginning @code{BZ2_}. -This is new in version 1.0. The intention is to minimise pollution -of the namespaces of library clients. - -@subsection Low-level summary - -This interface provides services for compressing and decompressing -data in memory. There's no provision for dealing with files, streams -or any other I/O mechanisms, just straight memory-to-memory work. -In fact, this part of the library can be compiled without inclusion -of @code{stdio.h}, which may be helpful for embedded applications. - -The low-level part of the library has no global variables and -is therefore thread-safe. - -Six routines make up the low level interface: -@code{BZ2_bzCompressInit}, @code{BZ2_bzCompress}, and @* @code{BZ2_bzCompressEnd} -for compression, -and a corresponding trio @code{BZ2_bzDecompressInit}, @* @code{BZ2_bzDecompress} -and @code{BZ2_bzDecompressEnd} for decompression. -The @code{*Init} functions allocate -memory for compression/decompression and do other -initialisations, whilst the @code{*End} functions close down operations -and release memory. - -The real work is done by @code{BZ2_bzCompress} and @code{BZ2_bzDecompress}. -These compress and decompress data from a user-supplied input buffer -to a user-supplied output buffer. These buffers can be any size; -arbitrary quantities of data are handled by making repeated calls -to these functions. This is a flexible mechanism allowing a -consumer-pull style of activity, or producer-push, or a mixture of -both. - - - -@subsection High-level summary - -This interface provides some handy wrappers around the low-level -interface to facilitate reading and writing @code{bzip2} format -files (@code{.bz2} files). The routines provide hooks to facilitate -reading files in which the @code{bzip2} data stream is embedded -within some larger-scale file structure, or where there are -multiple @code{bzip2} data streams concatenated end-to-end. - -For reading files, @code{BZ2_bzReadOpen}, @code{BZ2_bzRead}, -@code{BZ2_bzReadClose} and @* @code{BZ2_bzReadGetUnused} are supplied. For -writing files, @code{BZ2_bzWriteOpen}, @code{BZ2_bzWrite} and -@code{BZ2_bzWriteFinish} are available. - -As with the low-level library, no global variables are used -so the library is per se thread-safe. However, if I/O errors -occur whilst reading or writing the underlying compressed files, -you may have to consult @code{errno} to determine the cause of -the error. In that case, you'd need a C library which correctly -supports @code{errno} in a multithreaded environment. - -To make the library a little simpler and more portable, -@code{BZ2_bzReadOpen} and @code{BZ2_bzWriteOpen} require you to pass them file -handles (@code{FILE*}s) which have previously been opened for reading or -writing respectively. That avoids portability problems associated with -file operations and file attributes, whilst not being much of an -imposition on the programmer. - - - -@subsection Utility functions summary -For very simple needs, @code{BZ2_bzBuffToBuffCompress} and -@code{BZ2_bzBuffToBuffDecompress} are provided. These compress -data in memory from one buffer to another buffer in a single -function call. You should assess whether these functions -fulfill your memory-to-memory compression/decompression -requirements before investing effort in understanding the more -general but more complex low-level interface. - -Yoshioka Tsuneo (@code{QWF00133@@niftyserve.or.jp} / -@code{tsuneo-y@@is.aist-nara.ac.jp}) has contributed some functions to -give better @code{zlib} compatibility. These functions are -@code{BZ2_bzopen}, @code{BZ2_bzread}, @code{BZ2_bzwrite}, @code{BZ2_bzflush}, -@code{BZ2_bzclose}, -@code{BZ2_bzerror} and @code{BZ2_bzlibVersion}. You may find these functions -more convenient for simple file reading and writing, than those in the -high-level interface. These functions are not (yet) officially part of -the library, and are minimally documented here. If they break, you -get to keep all the pieces. I hope to document them properly when time -permits. - -Yoshioka also contributed modifications to allow the library to be -built as a Windows DLL. - - -@section Error handling - -The library is designed to recover cleanly in all situations, including -the worst-case situation of decompressing random data. I'm not -100% sure that it can always do this, so you might want to add -a signal handler to catch segmentation violations during decompression -if you are feeling especially paranoid. I would be interested in -hearing more about the robustness of the library to corrupted -compressed data. - -Version 1.0 is much more robust in this respect than -0.9.0 or 0.9.5. Investigations with Checker (a tool for -detecting problems with memory management, similar to Purify) -indicate that, at least for the few files I tested, all single-bit -errors in the decompressed data are caught properly, with no -segmentation faults, no reads of uninitialised data and no -out of range reads or writes. So it's certainly much improved, -although I wouldn't claim it to be totally bombproof. - -The file @code{bzlib.h} contains all definitions needed to use -the library. In particular, you should definitely not include -@code{bzlib_private.h}. - -In @code{bzlib.h}, the various return values are defined. The following -list is not intended as an exhaustive description of the circumstances -in which a given value may be returned -- those descriptions are given -later. Rather, it is intended to convey the rough meaning of each -return value. The first five actions are normal and not intended to -denote an error situation. -@table @code -@item BZ_OK -The requested action was completed successfully. -@item BZ_RUN_OK -@itemx BZ_FLUSH_OK -@itemx BZ_FINISH_OK -In @code{BZ2_bzCompress}, the requested flush/finish/nothing-special action -was completed successfully. -@item BZ_STREAM_END -Compression of data was completed, or the logical stream end was -detected during decompression. -@end table - -The following return values indicate an error of some kind. -@table @code -@item BZ_CONFIG_ERROR -Indicates that the library has been improperly compiled on your -platform -- a major configuration error. Specifically, it means -that @code{sizeof(char)}, @code{sizeof(short)} and @code{sizeof(int)} -are not 1, 2 and 4 respectively, as they should be. Note that the -library should still work properly on 64-bit platforms which follow -the LP64 programming model -- that is, where @code{sizeof(long)} -and @code{sizeof(void*)} are 8. Under LP64, @code{sizeof(int)} is -still 4, so @code{libbzip2}, which doesn't use the @code{long} type, -is OK. -@item BZ_SEQUENCE_ERROR -When using the library, it is important to call the functions in the -correct sequence and with data structures (buffers etc) in the correct -states. @code{libbzip2} checks as much as it can to ensure this is -happening, and returns @code{BZ_SEQUENCE_ERROR} if not. Code which -complies precisely with the function semantics, as detailed below, -should never receive this value; such an event denotes buggy code -which you should investigate. -@item BZ_PARAM_ERROR -Returned when a parameter to a function call is out of range -or otherwise manifestly incorrect. As with @code{BZ_SEQUENCE_ERROR}, -this denotes a bug in the client code. The distinction between -@code{BZ_PARAM_ERROR} and @code{BZ_SEQUENCE_ERROR} is a bit hazy, but still worth -making. -@item BZ_MEM_ERROR -Returned when a request to allocate memory failed. Note that the -quantity of memory needed to decompress a stream cannot be determined -until the stream's header has been read. So @code{BZ2_bzDecompress} and -@code{BZ2_bzRead} may return @code{BZ_MEM_ERROR} even though some of -the compressed data has been read. The same is not true for -compression; once @code{BZ2_bzCompressInit} or @code{BZ2_bzWriteOpen} have -successfully completed, @code{BZ_MEM_ERROR} cannot occur. -@item BZ_DATA_ERROR -Returned when a data integrity error is detected during decompression. -Most importantly, this means when stored and computed CRCs for the -data do not match. This value is also returned upon detection of any -other anomaly in the compressed data. -@item BZ_DATA_ERROR_MAGIC -As a special case of @code{BZ_DATA_ERROR}, it is sometimes useful to -know when the compressed stream does not start with the correct -magic bytes (@code{'B' 'Z' 'h'}). -@item BZ_IO_ERROR -Returned by @code{BZ2_bzRead} and @code{BZ2_bzWrite} when there is an error -reading or writing in the compressed file, and by @code{BZ2_bzReadOpen} -and @code{BZ2_bzWriteOpen} for attempts to use a file for which the -error indicator (viz, @code{ferror(f)}) is set. -On receipt of @code{BZ_IO_ERROR}, the caller should consult -@code{errno} and/or @code{perror} to acquire operating-system -specific information about the problem. -@item BZ_UNEXPECTED_EOF -Returned by @code{BZ2_bzRead} when the compressed file finishes -before the logical end of stream is detected. -@item BZ_OUTBUFF_FULL -Returned by @code{BZ2_bzBuffToBuffCompress} and -@code{BZ2_bzBuffToBuffDecompress} to indicate that the output data -will not fit into the output buffer provided. -@end table - - - -@section Low-level interface - -@subsection @code{BZ2_bzCompressInit} -@example -typedef - struct @{ - char *next_in; - unsigned int avail_in; - unsigned int total_in_lo32; - unsigned int total_in_hi32; - - char *next_out; - unsigned int avail_out; - unsigned int total_out_lo32; - unsigned int total_out_hi32; - - void *state; - - void *(*bzalloc)(void *,int,int); - void (*bzfree)(void *,void *); - void *opaque; - @} - bz_stream; - -int BZ2_bzCompressInit ( bz_stream *strm, - int blockSize100k, - int verbosity, - int workFactor ); - -@end example - -Prepares for compression. The @code{bz_stream} structure -holds all data pertaining to the compression activity. -A @code{bz_stream} structure should be allocated and initialised -prior to the call. -The fields of @code{bz_stream} -comprise the entirety of the user-visible data. @code{state} -is a pointer to the private data structures required for compression. - -Custom memory allocators are supported, via fields @code{bzalloc}, -@code{bzfree}, -and @code{opaque}. The value -@code{opaque} is passed to as the first argument to -all calls to @code{bzalloc} and @code{bzfree}, but is -otherwise ignored by the library. -The call @code{bzalloc ( opaque, n, m )} is expected to return a -pointer @code{p} to -@code{n * m} bytes of memory, and @code{bzfree ( opaque, p )} -should free -that memory. - -If you don't want to use a custom memory allocator, set @code{bzalloc}, -@code{bzfree} and -@code{opaque} to @code{NULL}, -and the library will then use the standard @code{malloc}/@code{free} -routines. - -Before calling @code{BZ2_bzCompressInit}, fields @code{bzalloc}, -@code{bzfree} and @code{opaque} should -be filled appropriately, as just described. Upon return, the internal -state will have been allocated and initialised, and @code{total_in_lo32}, -@code{total_in_hi32}, @code{total_out_lo32} and -@code{total_out_hi32} will have been set to zero. -These four fields are used by the library -to inform the caller of the total amount of data passed into and out of -the library, respectively. You should not try to change them. -As of version 1.0, 64-bit counts are maintained, even on 32-bit -platforms, using the @code{_hi32} fields to store the upper 32 bits -of the count. So, for example, the total amount of data in -is @code{(total_in_hi32 << 32) + total_in_lo32}. - -Parameter @code{blockSize100k} specifies the block size to be used for -compression. It should be a value between 1 and 9 inclusive, and the -actual block size used is 100000 x this figure. 9 gives the best -compression but takes most memory. - -Parameter @code{verbosity} should be set to a number between 0 and 4 -inclusive. 0 is silent, and greater numbers give increasingly verbose -monitoring/debugging output. If the library has been compiled with -@code{-DBZ_NO_STDIO}, no such output will appear for any verbosity -setting. - -Parameter @code{workFactor} controls how the compression phase behaves -when presented with worst case, highly repetitive, input data. If -compression runs into difficulties caused by repetitive data, the -library switches from the standard sorting algorithm to a fallback -algorithm. The fallback is slower than the standard algorithm by -perhaps a factor of three, but always behaves reasonably, no matter how -bad the input. - -Lower values of @code{workFactor} reduce the amount of effort the -standard algorithm will expend before resorting to the fallback. You -should set this parameter carefully; too low, and many inputs will be -handled by the fallback algorithm and so compress rather slowly, too -high, and your average-to-worst case compression times can become very -large. The default value of 30 gives reasonable behaviour over a wide -range of circumstances. - -Allowable values range from 0 to 250 inclusive. 0 is a special case, -equivalent to using the default value of 30. - -Note that the compressed output generated is the same regardless of -whether or not the fallback algorithm is used. - -Be aware also that this parameter may disappear entirely in future -versions of the library. In principle it should be possible to devise a -good way to automatically choose which algorithm to use. Such a -mechanism would render the parameter obsolete. - -Possible return values: -@display - @code{BZ_CONFIG_ERROR} - if the library has been mis-compiled - @code{BZ_PARAM_ERROR} - if @code{strm} is @code{NULL} - or @code{blockSize} < 1 or @code{blockSize} > 9 - or @code{verbosity} < 0 or @code{verbosity} > 4 - or @code{workFactor} < 0 or @code{workFactor} > 250 - @code{BZ_MEM_ERROR} - if not enough memory is available - @code{BZ_OK} - otherwise -@end display -Allowable next actions: -@display - @code{BZ2_bzCompress} - if @code{BZ_OK} is returned - no specific action needed in case of error -@end display - -@subsection @code{BZ2_bzCompress} -@example - int BZ2_bzCompress ( bz_stream *strm, int action ); -@end example -Provides more input and/or output buffer space for the library. The -caller maintains input and output buffers, and calls @code{BZ2_bzCompress} to -transfer data between them. - -Before each call to @code{BZ2_bzCompress}, @code{next_in} should point at -the data to be compressed, and @code{avail_in} should indicate how many -bytes the library may read. @code{BZ2_bzCompress} updates @code{next_in}, -@code{avail_in} and @code{total_in} to reflect the number of bytes it -has read. - -Similarly, @code{next_out} should point to a buffer in which the -compressed data is to be placed, with @code{avail_out} indicating how -much output space is available. @code{BZ2_bzCompress} updates -@code{next_out}, @code{avail_out} and @code{total_out} to reflect the -number of bytes output. - -You may provide and remove as little or as much data as you like on each -call of @code{BZ2_bzCompress}. In the limit, it is acceptable to supply and -remove data one byte at a time, although this would be terribly -inefficient. You should always ensure that at least one byte of output -space is available at each call. - -A second purpose of @code{BZ2_bzCompress} is to request a change of mode of the -compressed stream. - -Conceptually, a compressed stream can be in one of four states: IDLE, -RUNNING, FLUSHING and FINISHING. Before initialisation -(@code{BZ2_bzCompressInit}) and after termination (@code{BZ2_bzCompressEnd}), a -stream is regarded as IDLE. - -Upon initialisation (@code{BZ2_bzCompressInit}), the stream is placed in the -RUNNING state. Subsequent calls to @code{BZ2_bzCompress} should pass -@code{BZ_RUN} as the requested action; other actions are illegal and -will result in @code{BZ_SEQUENCE_ERROR}. - -At some point, the calling program will have provided all the input data -it wants to. It will then want to finish up -- in effect, asking the -library to process any data it might have buffered internally. In this -state, @code{BZ2_bzCompress} will no longer attempt to read data from -@code{next_in}, but it will want to write data to @code{next_out}. -Because the output buffer supplied by the user can be arbitrarily small, -the finishing-up operation cannot necessarily be done with a single call -of @code{BZ2_bzCompress}. - -Instead, the calling program passes @code{BZ_FINISH} as an action to -@code{BZ2_bzCompress}. This changes the stream's state to FINISHING. Any -remaining input (ie, @code{next_in[0 .. avail_in-1]}) is compressed and -transferred to the output buffer. To do this, @code{BZ2_bzCompress} must be -called repeatedly until all the output has been consumed. At that -point, @code{BZ2_bzCompress} returns @code{BZ_STREAM_END}, and the stream's -state is set back to IDLE. @code{BZ2_bzCompressEnd} should then be -called. - -Just to make sure the calling program does not cheat, the library makes -a note of @code{avail_in} at the time of the first call to -@code{BZ2_bzCompress} which has @code{BZ_FINISH} as an action (ie, at the -time the program has announced its intention to not supply any more -input). By comparing this value with that of @code{avail_in} over -subsequent calls to @code{BZ2_bzCompress}, the library can detect any -attempts to slip in more data to compress. Any calls for which this is -detected will return @code{BZ_SEQUENCE_ERROR}. This indicates a -programming mistake which should be corrected. - -Instead of asking to finish, the calling program may ask -@code{BZ2_bzCompress} to take all the remaining input, compress it and -terminate the current (Burrows-Wheeler) compression block. This could -be useful for error control purposes. The mechanism is analogous to -that for finishing: call @code{BZ2_bzCompress} with an action of -@code{BZ_FLUSH}, remove output data, and persist with the -@code{BZ_FLUSH} action until the value @code{BZ_RUN} is returned. As -with finishing, @code{BZ2_bzCompress} detects any attempt to provide more -input data once the flush has begun. - -Once the flush is complete, the stream returns to the normal RUNNING -state. - -This all sounds pretty complex, but isn't really. Here's a table -which shows which actions are allowable in each state, what action -will be taken, what the next state is, and what the non-error return -values are. Note that you can't explicitly ask what state the -stream is in, but nor do you need to -- it can be inferred from the -values returned by @code{BZ2_bzCompress}. -@display -IDLE/@code{any} - Illegal. IDLE state only exists after @code{BZ2_bzCompressEnd} or - before @code{BZ2_bzCompressInit}. - Return value = @code{BZ_SEQUENCE_ERROR} - -RUNNING/@code{BZ_RUN} - Compress from @code{next_in} to @code{next_out} as much as possible. - Next state = RUNNING - Return value = @code{BZ_RUN_OK} - -RUNNING/@code{BZ_FLUSH} - Remember current value of @code{next_in}. Compress from @code{next_in} - to @code{next_out} as much as possible, but do not accept any more input. - Next state = FLUSHING - Return value = @code{BZ_FLUSH_OK} - -RUNNING/@code{BZ_FINISH} - Remember current value of @code{next_in}. Compress from @code{next_in} - to @code{next_out} as much as possible, but do not accept any more input. - Next state = FINISHING - Return value = @code{BZ_FINISH_OK} - -FLUSHING/@code{BZ_FLUSH} - Compress from @code{next_in} to @code{next_out} as much as possible, - but do not accept any more input. - If all the existing input has been used up and all compressed - output has been removed - Next state = RUNNING; Return value = @code{BZ_RUN_OK} - else - Next state = FLUSHING; Return value = @code{BZ_FLUSH_OK} - -FLUSHING/other - Illegal. - Return value = @code{BZ_SEQUENCE_ERROR} - -FINISHING/@code{BZ_FINISH} - Compress from @code{next_in} to @code{next_out} as much as possible, - but to not accept any more input. - If all the existing input has been used up and all compressed - output has been removed - Next state = IDLE; Return value = @code{BZ_STREAM_END} - else - Next state = FINISHING; Return value = @code{BZ_FINISHING} - -FINISHING/other - Illegal. - Return value = @code{BZ_SEQUENCE_ERROR} -@end display - -That still looks complicated? Well, fair enough. The usual sequence -of calls for compressing a load of data is: -@itemize @bullet -@item Get started with @code{BZ2_bzCompressInit}. -@item Shovel data in and shlurp out its compressed form using zero or more -calls of @code{BZ2_bzCompress} with action = @code{BZ_RUN}. -@item Finish up. -Repeatedly call @code{BZ2_bzCompress} with action = @code{BZ_FINISH}, -copying out the compressed output, until @code{BZ_STREAM_END} is returned. -@item Close up and go home. Call @code{BZ2_bzCompressEnd}. -@end itemize -If the data you want to compress fits into your input buffer all -at once, you can skip the calls of @code{BZ2_bzCompress ( ..., BZ_RUN )} and -just do the @code{BZ2_bzCompress ( ..., BZ_FINISH )} calls. - -All required memory is allocated by @code{BZ2_bzCompressInit}. The -compression library can accept any data at all (obviously). So you -shouldn't get any error return values from the @code{BZ2_bzCompress} calls. -If you do, they will be @code{BZ_SEQUENCE_ERROR}, and indicate a bug in -your programming. - -Trivial other possible return values: -@display - @code{BZ_PARAM_ERROR} - if @code{strm} is @code{NULL}, or @code{strm->s} is @code{NULL} -@end display - -@subsection @code{BZ2_bzCompressEnd} -@example -int BZ2_bzCompressEnd ( bz_stream *strm ); -@end example -Releases all memory associated with a compression stream. - -Possible return values: -@display - @code{BZ_PARAM_ERROR} if @code{strm} is @code{NULL} or @code{strm->s} is @code{NULL} - @code{BZ_OK} otherwise -@end display - - -@subsection @code{BZ2_bzDecompressInit} -@example -int BZ2_bzDecompressInit ( bz_stream *strm, int verbosity, int small ); -@end example -Prepares for decompression. As with @code{BZ2_bzCompressInit}, a -@code{bz_stream} record should be allocated and initialised before the -call. Fields @code{bzalloc}, @code{bzfree} and @code{opaque} should be -set if a custom memory allocator is required, or made @code{NULL} for -the normal @code{malloc}/@code{free} routines. Upon return, the internal -state will have been initialised, and @code{total_in} and -@code{total_out} will be zero. - -For the meaning of parameter @code{verbosity}, see @code{BZ2_bzCompressInit}. - -If @code{small} is nonzero, the library will use an alternative -decompression algorithm which uses less memory but at the cost of -decompressing more slowly (roughly speaking, half the speed, but the -maximum memory requirement drops to around 2300k). See Chapter 2 for -more information on memory management. - -Note that the amount of memory needed to decompress -a stream cannot be determined until the stream's header has been read, -so even if @code{BZ2_bzDecompressInit} succeeds, a subsequent -@code{BZ2_bzDecompress} could fail with @code{BZ_MEM_ERROR}. - -Possible return values: -@display - @code{BZ_CONFIG_ERROR} - if the library has been mis-compiled - @code{BZ_PARAM_ERROR} - if @code{(small != 0 && small != 1)} - or @code{(verbosity < 0 || verbosity > 4)} - @code{BZ_MEM_ERROR} - if insufficient memory is available -@end display - -Allowable next actions: -@display - @code{BZ2_bzDecompress} - if @code{BZ_OK} was returned - no specific action required in case of error -@end display - - - -@subsection @code{BZ2_bzDecompress} -@example -int BZ2_bzDecompress ( bz_stream *strm ); -@end example -Provides more input and/out output buffer space for the library. The -caller maintains input and output buffers, and uses @code{BZ2_bzDecompress} -to transfer data between them. - -Before each call to @code{BZ2_bzDecompress}, @code{next_in} -should point at the compressed data, -and @code{avail_in} should indicate how many bytes the library -may read. @code{BZ2_bzDecompress} updates @code{next_in}, @code{avail_in} -and @code{total_in} -to reflect the number of bytes it has read. - -Similarly, @code{next_out} should point to a buffer in which the uncompressed -output is to be placed, with @code{avail_out} indicating how much output space -is available. @code{BZ2_bzCompress} updates @code{next_out}, -@code{avail_out} and @code{total_out} to reflect -the number of bytes output. - -You may provide and remove as little or as much data as you like on -each call of @code{BZ2_bzDecompress}. -In the limit, it is acceptable to -supply and remove data one byte at a time, although this would be -terribly inefficient. You should always ensure that at least one -byte of output space is available at each call. - -Use of @code{BZ2_bzDecompress} is simpler than @code{BZ2_bzCompress}. - -You should provide input and remove output as described above, and -repeatedly call @code{BZ2_bzDecompress} until @code{BZ_STREAM_END} is -returned. Appearance of @code{BZ_STREAM_END} denotes that -@code{BZ2_bzDecompress} has detected the logical end of the compressed -stream. @code{BZ2_bzDecompress} will not produce @code{BZ_STREAM_END} until -all output data has been placed into the output buffer, so once -@code{BZ_STREAM_END} appears, you are guaranteed to have available all -the decompressed output, and @code{BZ2_bzDecompressEnd} can safely be -called. - -If case of an error return value, you should call @code{BZ2_bzDecompressEnd} -to clean up and release memory. - -Possible return values: -@display - @code{BZ_PARAM_ERROR} - if @code{strm} is @code{NULL} or @code{strm->s} is @code{NULL} - or @code{strm->avail_out < 1} - @code{BZ_DATA_ERROR} - if a data integrity error is detected in the compressed stream - @code{BZ_DATA_ERROR_MAGIC} - if the compressed stream doesn't begin with the right magic bytes - @code{BZ_MEM_ERROR} - if there wasn't enough memory available - @code{BZ_STREAM_END} - if the logical end of the data stream was detected and all - output in has been consumed, eg @code{s->avail_out > 0} - @code{BZ_OK} - otherwise -@end display -Allowable next actions: -@display - @code{BZ2_bzDecompress} - if @code{BZ_OK} was returned - @code{BZ2_bzDecompressEnd} - otherwise -@end display - - -@subsection @code{BZ2_bzDecompressEnd} -@example -int BZ2_bzDecompressEnd ( bz_stream *strm ); -@end example -Releases all memory associated with a decompression stream. - -Possible return values: -@display - @code{BZ_PARAM_ERROR} - if @code{strm} is @code{NULL} or @code{strm->s} is @code{NULL} - @code{BZ_OK} - otherwise -@end display - -Allowable next actions: -@display - None. -@end display - - -@section High-level interface - -This interface provides functions for reading and writing -@code{bzip2} format files. First, some general points. - -@itemize @bullet -@item All of the functions take an @code{int*} first argument, - @code{bzerror}. - After each call, @code{bzerror} should be consulted first to determine - the outcome of the call. If @code{bzerror} is @code{BZ_OK}, - the call completed - successfully, and only then should the return value of the function - (if any) be consulted. If @code{bzerror} is @code{BZ_IO_ERROR}, - there was an error - reading/writing the underlying compressed file, and you should - then consult @code{errno}/@code{perror} to determine the - cause of the difficulty. - @code{bzerror} may also be set to various other values; precise details are - given on a per-function basis below. -@item If @code{bzerror} indicates an error - (ie, anything except @code{BZ_OK} and @code{BZ_STREAM_END}), - you should immediately call @code{BZ2_bzReadClose} (or @code{BZ2_bzWriteClose}, - depending on whether you are attempting to read or to write) - to free up all resources associated - with the stream. Once an error has been indicated, behaviour of all calls - except @code{BZ2_bzReadClose} (@code{BZ2_bzWriteClose}) is undefined. - The implication is that (1) @code{bzerror} should - be checked after each call, and (2) if @code{bzerror} indicates an error, - @code{BZ2_bzReadClose} (@code{BZ2_bzWriteClose}) should then be called to clean up. -@item The @code{FILE*} arguments passed to - @code{BZ2_bzReadOpen}/@code{BZ2_bzWriteOpen} - should be set to binary mode. - Most Unix systems will do this by default, but other platforms, - including Windows and Mac, will not. If you omit this, you may - encounter problems when moving code to new platforms. -@item Memory allocation requests are handled by - @code{malloc}/@code{free}. - At present - there is no facility for user-defined memory allocators in the file I/O - functions (could easily be added, though). -@end itemize - - - -@subsection @code{BZ2_bzReadOpen} -@example - typedef void BZFILE; - - BZFILE *BZ2_bzReadOpen ( int *bzerror, FILE *f, - int small, int verbosity, - void *unused, int nUnused ); -@end example -Prepare to read compressed data from file handle @code{f}. @code{f} -should refer to a file which has been opened for reading, and for which -the error indicator (@code{ferror(f)})is not set. If @code{small} is 1, -the library will try to decompress using less memory, at the expense of -speed. - -For reasons explained below, @code{BZ2_bzRead} will decompress the -@code{nUnused} bytes starting at @code{unused}, before starting to read -from the file @code{f}. At most @code{BZ_MAX_UNUSED} bytes may be -supplied like this. If this facility is not required, you should pass -@code{NULL} and @code{0} for @code{unused} and n@code{Unused} -respectively. - -For the meaning of parameters @code{small} and @code{verbosity}, -see @code{BZ2_bzDecompressInit}. - -The amount of memory needed to decompress a file cannot be determined -until the file's header has been read. So it is possible that -@code{BZ2_bzReadOpen} returns @code{BZ_OK} but a subsequent call of -@code{BZ2_bzRead} will return @code{BZ_MEM_ERROR}. - -Possible assignments to @code{bzerror}: -@display - @code{BZ_CONFIG_ERROR} - if the library has been mis-compiled - @code{BZ_PARAM_ERROR} - if @code{f} is @code{NULL} - or @code{small} is neither @code{0} nor @code{1} - or @code{(unused == NULL && nUnused != 0)} - or @code{(unused != NULL && !(0 <= nUnused <= BZ_MAX_UNUSED))} - @code{BZ_IO_ERROR} - if @code{ferror(f)} is nonzero - @code{BZ_MEM_ERROR} - if insufficient memory is available - @code{BZ_OK} - otherwise. -@end display - -Possible return values: -@display - Pointer to an abstract @code{BZFILE} - if @code{bzerror} is @code{BZ_OK} - @code{NULL} - otherwise -@end display - -Allowable next actions: -@display - @code{BZ2_bzRead} - if @code{bzerror} is @code{BZ_OK} - @code{BZ2_bzClose} - otherwise -@end display - - -@subsection @code{BZ2_bzRead} -@example - int BZ2_bzRead ( int *bzerror, BZFILE *b, void *buf, int len ); -@end example -Reads up to @code{len} (uncompressed) bytes from the compressed file -@code{b} into -the buffer @code{buf}. If the read was successful, -@code{bzerror} is set to @code{BZ_OK} -and the number of bytes read is returned. If the logical end-of-stream -was detected, @code{bzerror} will be set to @code{BZ_STREAM_END}, -and the number -of bytes read is returned. All other @code{bzerror} values denote an error. - -@code{BZ2_bzRead} will supply @code{len} bytes, -unless the logical stream end is detected -or an error occurs. Because of this, it is possible to detect the -stream end by observing when the number of bytes returned is -less than the number -requested. Nevertheless, this is regarded as inadvisable; you should -instead check @code{bzerror} after every call and watch out for -@code{BZ_STREAM_END}. - -Internally, @code{BZ2_bzRead} copies data from the compressed file in chunks -of size @code{BZ_MAX_UNUSED} bytes -before decompressing it. If the file contains more bytes than strictly -needed to reach the logical end-of-stream, @code{BZ2_bzRead} will almost certainly -read some of the trailing data before signalling @code{BZ_SEQUENCE_END}. -To collect the read but unused data once @code{BZ_SEQUENCE_END} has -appeared, call @code{BZ2_bzReadGetUnused} immediately before @code{BZ2_bzReadClose}. - -Possible assignments to @code{bzerror}: -@display - @code{BZ_PARAM_ERROR} - if @code{b} is @code{NULL} or @code{buf} is @code{NULL} or @code{len < 0} - @code{BZ_SEQUENCE_ERROR} - if @code{b} was opened with @code{BZ2_bzWriteOpen} - @code{BZ_IO_ERROR} - if there is an error reading from the compressed file - @code{BZ_UNEXPECTED_EOF} - if the compressed file ended before the logical end-of-stream was detected - @code{BZ_DATA_ERROR} - if a data integrity error was detected in the compressed stream - @code{BZ_DATA_ERROR_MAGIC} - if the stream does not begin with the requisite header bytes (ie, is not - a @code{bzip2} data file). This is really a special case of @code{BZ_DATA_ERROR}. - @code{BZ_MEM_ERROR} - if insufficient memory was available - @code{BZ_STREAM_END} - if the logical end of stream was detected. - @code{BZ_OK} - otherwise. -@end display - -Possible return values: -@display - number of bytes read - if @code{bzerror} is @code{BZ_OK} or @code{BZ_STREAM_END} - undefined - otherwise -@end display - -Allowable next actions: -@display - collect data from @code{buf}, then @code{BZ2_bzRead} or @code{BZ2_bzReadClose} - if @code{bzerror} is @code{BZ_OK} - collect data from @code{buf}, then @code{BZ2_bzReadClose} or @code{BZ2_bzReadGetUnused} - if @code{bzerror} is @code{BZ_SEQUENCE_END} - @code{BZ2_bzReadClose} - otherwise -@end display - - - -@subsection @code{BZ2_bzReadGetUnused} -@example - void BZ2_bzReadGetUnused ( int* bzerror, BZFILE *b, - void** unused, int* nUnused ); -@end example -Returns data which was read from the compressed file but was not needed -to get to the logical end-of-stream. @code{*unused} is set to the address -of the data, and @code{*nUnused} to the number of bytes. @code{*nUnused} will -be set to a value between @code{0} and @code{BZ_MAX_UNUSED} inclusive. - -This function may only be called once @code{BZ2_bzRead} has signalled -@code{BZ_STREAM_END} but before @code{BZ2_bzReadClose}. - -Possible assignments to @code{bzerror}: -@display - @code{BZ_PARAM_ERROR} - if @code{b} is @code{NULL} - or @code{unused} is @code{NULL} or @code{nUnused} is @code{NULL} - @code{BZ_SEQUENCE_ERROR} - if @code{BZ_STREAM_END} has not been signalled - or if @code{b} was opened with @code{BZ2_bzWriteOpen} - @code{BZ_OK} - otherwise -@end display - -Allowable next actions: -@display - @code{BZ2_bzReadClose} -@end display - - -@subsection @code{BZ2_bzReadClose} -@example - void BZ2_bzReadClose ( int *bzerror, BZFILE *b ); -@end example -Releases all memory pertaining to the compressed file @code{b}. -@code{BZ2_bzReadClose} does not call @code{fclose} on the underlying file -handle, so you should do that yourself if appropriate. -@code{BZ2_bzReadClose} should be called to clean up after all error -situations. - -Possible assignments to @code{bzerror}: -@display - @code{BZ_SEQUENCE_ERROR} - if @code{b} was opened with @code{BZ2_bzOpenWrite} - @code{BZ_OK} - otherwise -@end display - -Allowable next actions: -@display - none -@end display - - - -@subsection @code{BZ2_bzWriteOpen} -@example - BZFILE *BZ2_bzWriteOpen ( int *bzerror, FILE *f, - int blockSize100k, int verbosity, - int workFactor ); -@end example -Prepare to write compressed data to file handle @code{f}. -@code{f} should refer to -a file which has been opened for writing, and for which the error -indicator (@code{ferror(f)})is not set. - -For the meaning of parameters @code{blockSize100k}, -@code{verbosity} and @code{workFactor}, see -@* @code{BZ2_bzCompressInit}. - -All required memory is allocated at this stage, so if the call -completes successfully, @code{BZ_MEM_ERROR} cannot be signalled by a -subsequent call to @code{BZ2_bzWrite}. - -Possible assignments to @code{bzerror}: -@display - @code{BZ_CONFIG_ERROR} - if the library has been mis-compiled - @code{BZ_PARAM_ERROR} - if @code{f} is @code{NULL} - or @code{blockSize100k < 1} or @code{blockSize100k > 9} - @code{BZ_IO_ERROR} - if @code{ferror(f)} is nonzero - @code{BZ_MEM_ERROR} - if insufficient memory is available - @code{BZ_OK} - otherwise -@end display - -Possible return values: -@display - Pointer to an abstract @code{BZFILE} - if @code{bzerror} is @code{BZ_OK} - @code{NULL} - otherwise -@end display - -Allowable next actions: -@display - @code{BZ2_bzWrite} - if @code{bzerror} is @code{BZ_OK} - (you could go directly to @code{BZ2_bzWriteClose}, but this would be pretty pointless) - @code{BZ2_bzWriteClose} - otherwise -@end display - - - -@subsection @code{BZ2_bzWrite} -@example - void BZ2_bzWrite ( int *bzerror, BZFILE *b, void *buf, int len ); -@end example -Absorbs @code{len} bytes from the buffer @code{buf}, eventually to be -compressed and written to the file. - -Possible assignments to @code{bzerror}: -@display - @code{BZ_PARAM_ERROR} - if @code{b} is @code{NULL} or @code{buf} is @code{NULL} or @code{len < 0} - @code{BZ_SEQUENCE_ERROR} - if b was opened with @code{BZ2_bzReadOpen} - @code{BZ_IO_ERROR} - if there is an error writing the compressed file. - @code{BZ_OK} - otherwise -@end display - - - - -@subsection @code{BZ2_bzWriteClose} -@example - void BZ2_bzWriteClose ( int *bzerror, BZFILE* f, - int abandon, - unsigned int* nbytes_in, - unsigned int* nbytes_out ); - - void BZ2_bzWriteClose64 ( int *bzerror, BZFILE* f, - int abandon, - unsigned int* nbytes_in_lo32, - unsigned int* nbytes_in_hi32, - unsigned int* nbytes_out_lo32, - unsigned int* nbytes_out_hi32 ); -@end example - -Compresses and flushes to the compressed file all data so far supplied -by @code{BZ2_bzWrite}. The logical end-of-stream markers are also written, so -subsequent calls to @code{BZ2_bzWrite} are illegal. All memory associated -with the compressed file @code{b} is released. -@code{fflush} is called on the -compressed file, but it is not @code{fclose}'d. - -If @code{BZ2_bzWriteClose} is called to clean up after an error, the only -action is to release the memory. The library records the error codes -issued by previous calls, so this situation will be detected -automatically. There is no attempt to complete the compression -operation, nor to @code{fflush} the compressed file. You can force this -behaviour to happen even in the case of no error, by passing a nonzero -value to @code{abandon}. - -If @code{nbytes_in} is non-null, @code{*nbytes_in} will be set to be the -total volume of uncompressed data handled. Similarly, @code{nbytes_out} -will be set to the total volume of compressed data written. For -compatibility with older versions of the library, @code{BZ2_bzWriteClose} -only yields the lower 32 bits of these counts. Use -@code{BZ2_bzWriteClose64} if you want the full 64 bit counts. These -two functions are otherwise absolutely identical. - - -Possible assignments to @code{bzerror}: -@display - @code{BZ_SEQUENCE_ERROR} - if @code{b} was opened with @code{BZ2_bzReadOpen} - @code{BZ_IO_ERROR} - if there is an error writing the compressed file - @code{BZ_OK} - otherwise -@end display - -@subsection Handling embedded compressed data streams - -The high-level library facilitates use of -@code{bzip2} data streams which form some part of a surrounding, larger -data stream. -@itemize @bullet -@item For writing, the library takes an open file handle, writes -compressed data to it, @code{fflush}es it but does not @code{fclose} it. -The calling application can write its own data before and after the -compressed data stream, using that same file handle. -@item Reading is more complex, and the facilities are not as general -as they could be since generality is hard to reconcile with efficiency. -@code{BZ2_bzRead} reads from the compressed file in blocks of size -@code{BZ_MAX_UNUSED} bytes, and in doing so probably will overshoot -the logical end of compressed stream. -To recover this data once decompression has -ended, call @code{BZ2_bzReadGetUnused} after the last call of @code{BZ2_bzRead} -(the one returning @code{BZ_STREAM_END}) but before calling -@code{BZ2_bzReadClose}. -@end itemize - -This mechanism makes it easy to decompress multiple @code{bzip2} -streams placed end-to-end. As the end of one stream, when @code{BZ2_bzRead} -returns @code{BZ_STREAM_END}, call @code{BZ2_bzReadGetUnused} to collect the -unused data (copy it into your own buffer somewhere). -That data forms the start of the next compressed stream. -To start uncompressing that next stream, call @code{BZ2_bzReadOpen} again, -feeding in the unused data via the @code{unused}/@code{nUnused} -parameters. -Keep doing this until @code{BZ_STREAM_END} return coincides with the -physical end of file (@code{feof(f)}). In this situation -@code{BZ2_bzReadGetUnused} -will of course return no data. - -This should give some feel for how the high-level interface can be used. -If you require extra flexibility, you'll have to bite the bullet and get -to grips with the low-level interface. - -@subsection Standard file-reading/writing code -Here's how you'd write data to a compressed file: -@example @code -FILE* f; -BZFILE* b; -int nBuf; -char buf[ /* whatever size you like */ ]; -int bzerror; -int nWritten; - -f = fopen ( "myfile.bz2", "w" ); -if (!f) @{ - /* handle error */ -@} -b = BZ2_bzWriteOpen ( &bzerror, f, 9 ); -if (bzerror != BZ_OK) @{ - BZ2_bzWriteClose ( b ); - /* handle error */ -@} - -while ( /* condition */ ) @{ - /* get data to write into buf, and set nBuf appropriately */ - nWritten = BZ2_bzWrite ( &bzerror, b, buf, nBuf ); - if (bzerror == BZ_IO_ERROR) @{ - BZ2_bzWriteClose ( &bzerror, b ); - /* handle error */ - @} -@} - -BZ2_bzWriteClose ( &bzerror, b ); -if (bzerror == BZ_IO_ERROR) @{ - /* handle error */ -@} -@end example -And to read from a compressed file: -@example -FILE* f; -BZFILE* b; -int nBuf; -char buf[ /* whatever size you like */ ]; -int bzerror; -int nWritten; - -f = fopen ( "myfile.bz2", "r" ); -if (!f) @{ - /* handle error */ -@} -b = BZ2_bzReadOpen ( &bzerror, f, 0, NULL, 0 ); -if (bzerror != BZ_OK) @{ - BZ2_bzReadClose ( &bzerror, b ); - /* handle error */ -@} - -bzerror = BZ_OK; -while (bzerror == BZ_OK && /* arbitrary other conditions */) @{ - nBuf = BZ2_bzRead ( &bzerror, b, buf, /* size of buf */ ); - if (bzerror == BZ_OK) @{ - /* do something with buf[0 .. nBuf-1] */ - @} -@} -if (bzerror != BZ_STREAM_END) @{ - BZ2_bzReadClose ( &bzerror, b ); - /* handle error */ -@} else @{ - BZ2_bzReadClose ( &bzerror ); -@} -@end example - - - -@section Utility functions -@subsection @code{BZ2_bzBuffToBuffCompress} -@example - int BZ2_bzBuffToBuffCompress( char* dest, - unsigned int* destLen, - char* source, - unsigned int sourceLen, - int blockSize100k, - int verbosity, - int workFactor ); -@end example -Attempts to compress the data in @code{source[0 .. sourceLen-1]} -into the destination buffer, @code{dest[0 .. *destLen-1]}. -If the destination buffer is big enough, @code{*destLen} is -set to the size of the compressed data, and @code{BZ_OK} is -returned. If the compressed data won't fit, @code{*destLen} -is unchanged, and @code{BZ_OUTBUFF_FULL} is returned. - -Compression in this manner is a one-shot event, done with a single call -to this function. The resulting compressed data is a complete -@code{bzip2} format data stream. There is no mechanism for making -additional calls to provide extra input data. If you want that kind of -mechanism, use the low-level interface. - -For the meaning of parameters @code{blockSize100k}, @code{verbosity} -and @code{workFactor}, @* see @code{BZ2_bzCompressInit}. - -To guarantee that the compressed data will fit in its buffer, allocate -an output buffer of size 1% larger than the uncompressed data, plus -six hundred extra bytes. - -@code{BZ2_bzBuffToBuffDecompress} will not write data at or -beyond @code{dest[*destLen]}, even in case of buffer overflow. - -Possible return values: -@display - @code{BZ_CONFIG_ERROR} - if the library has been mis-compiled - @code{BZ_PARAM_ERROR} - if @code{dest} is @code{NULL} or @code{destLen} is @code{NULL} - or @code{blockSize100k < 1} or @code{blockSize100k > 9} - or @code{verbosity < 0} or @code{verbosity > 4} - or @code{workFactor < 0} or @code{workFactor > 250} - @code{BZ_MEM_ERROR} - if insufficient memory is available - @code{BZ_OUTBUFF_FULL} - if the size of the compressed data exceeds @code{*destLen} - @code{BZ_OK} - otherwise -@end display - - - -@subsection @code{BZ2_bzBuffToBuffDecompress} -@example - int BZ2_bzBuffToBuffDecompress ( char* dest, - unsigned int* destLen, - char* source, - unsigned int sourceLen, - int small, - int verbosity ); -@end example -Attempts to decompress the data in @code{source[0 .. sourceLen-1]} -into the destination buffer, @code{dest[0 .. *destLen-1]}. -If the destination buffer is big enough, @code{*destLen} is -set to the size of the uncompressed data, and @code{BZ_OK} is -returned. If the compressed data won't fit, @code{*destLen} -is unchanged, and @code{BZ_OUTBUFF_FULL} is returned. - -@code{source} is assumed to hold a complete @code{bzip2} format -data stream. @* @code{BZ2_bzBuffToBuffDecompress} tries to decompress -the entirety of the stream into the output buffer. - -For the meaning of parameters @code{small} and @code{verbosity}, -see @code{BZ2_bzDecompressInit}. - -Because the compression ratio of the compressed data cannot be known in -advance, there is no easy way to guarantee that the output buffer will -be big enough. You may of course make arrangements in your code to -record the size of the uncompressed data, but such a mechanism is beyond -the scope of this library. - -@code{BZ2_bzBuffToBuffDecompress} will not write data at or -beyond @code{dest[*destLen]}, even in case of buffer overflow. - -Possible return values: -@display - @code{BZ_CONFIG_ERROR} - if the library has been mis-compiled - @code{BZ_PARAM_ERROR} - if @code{dest} is @code{NULL} or @code{destLen} is @code{NULL} - or @code{small != 0 && small != 1} - or @code{verbosity < 0} or @code{verbosity > 4} - @code{BZ_MEM_ERROR} - if insufficient memory is available - @code{BZ_OUTBUFF_FULL} - if the size of the compressed data exceeds @code{*destLen} - @code{BZ_DATA_ERROR} - if a data integrity error was detected in the compressed data - @code{BZ_DATA_ERROR_MAGIC} - if the compressed data doesn't begin with the right magic bytes - @code{BZ_UNEXPECTED_EOF} - if the compressed data ends unexpectedly - @code{BZ_OK} - otherwise -@end display - - - -@section @code{zlib} compatibility functions -Yoshioka Tsuneo has contributed some functions to -give better @code{zlib} compatibility. These functions are -@code{BZ2_bzopen}, @code{BZ2_bzread}, @code{BZ2_bzwrite}, @code{BZ2_bzflush}, -@code{BZ2_bzclose}, -@code{BZ2_bzerror} and @code{BZ2_bzlibVersion}. -These functions are not (yet) officially part of -the library. If they break, you get to keep all the pieces. -Nevertheless, I think they work ok. -@example -typedef void BZFILE; - -const char * BZ2_bzlibVersion ( void ); -@end example -Returns a string indicating the library version. -@example -BZFILE * BZ2_bzopen ( const char *path, const char *mode ); -BZFILE * BZ2_bzdopen ( int fd, const char *mode ); -@end example -Opens a @code{.bz2} file for reading or writing, using either its name -or a pre-existing file descriptor. -Analogous to @code{fopen} and @code{fdopen}. -@example -int BZ2_bzread ( BZFILE* b, void* buf, int len ); -int BZ2_bzwrite ( BZFILE* b, void* buf, int len ); -@end example -Reads/writes data from/to a previously opened @code{BZFILE}. -Analogous to @code{fread} and @code{fwrite}. -@example -int BZ2_bzflush ( BZFILE* b ); -void BZ2_bzclose ( BZFILE* b ); -@end example -Flushes/closes a @code{BZFILE}. @code{BZ2_bzflush} doesn't actually do -anything. Analogous to @code{fflush} and @code{fclose}. - -@example -const char * BZ2_bzerror ( BZFILE *b, int *errnum ) -@end example -Returns a string describing the more recent error status of -@code{b}, and also sets @code{*errnum} to its numerical value. - - -@section Using the library in a @code{stdio}-free environment - -@subsection Getting rid of @code{stdio} - -In a deeply embedded application, you might want to use just -the memory-to-memory functions. You can do this conveniently -by compiling the library with preprocessor symbol @code{BZ_NO_STDIO} -defined. Doing this gives you a library containing only the following -eight functions: - -@code{BZ2_bzCompressInit}, @code{BZ2_bzCompress}, @code{BZ2_bzCompressEnd} @* -@code{BZ2_bzDecompressInit}, @code{BZ2_bzDecompress}, @code{BZ2_bzDecompressEnd} @* -@code{BZ2_bzBuffToBuffCompress}, @code{BZ2_bzBuffToBuffDecompress} - -When compiled like this, all functions will ignore @code{verbosity} -settings. - -@subsection Critical error handling -@code{libbzip2} contains a number of internal assertion checks which -should, needless to say, never be activated. Nevertheless, if an -assertion should fail, behaviour depends on whether or not the library -was compiled with @code{BZ_NO_STDIO} set. - -For a normal compile, an assertion failure yields the message -@example - bzip2/libbzip2: internal error number N. - This is a bug in bzip2/libbzip2, 1.0.2, 30-Dec-2001. - Please report it to me at: jseward@@acm.org. If this happened - when you were using some program which uses libbzip2 as a - component, you should also report this bug to the author(s) - of that program. Please make an effort to report this bug; - timely and accurate bug reports eventually lead to higher - quality software. Thanks. Julian Seward, 30 December 2001. -@end example -where @code{N} is some error code number. If @code{N == 1007}, it also -prints some extra text advising the reader that unreliable memory is -often associated with internal error 1007. (This is a -frequently-observed-phenomenon with versions 1.0.0/1.0.1). - -@code{exit(3)} is then called. - -For a @code{stdio}-free library, assertion failures result -in a call to a function declared as: -@example - extern void bz_internal_error ( int errcode ); -@end example -The relevant code is passed as a parameter. You should supply -such a function. - -In either case, once an assertion failure has occurred, any -@code{bz_stream} records involved can be regarded as invalid. -You should not attempt to resume normal operation with them. - -You may, of course, change critical error handling to suit -your needs. As I said above, critical errors indicate bugs -in the library and should not occur. All "normal" error -situations are indicated via error return codes from functions, -and can be recovered from. - - -@section Making a Windows DLL -Everything related to Windows has been contributed by Yoshioka Tsuneo -@* (@code{QWF00133@@niftyserve.or.jp} / -@code{tsuneo-y@@is.aist-nara.ac.jp}), so you should send your queries to -him (but perhaps Cc: me, @code{jseward@@acm.org}). - -My vague understanding of what to do is: using Visual C++ 5.0, -open the project file @code{libbz2.dsp}, and build. That's all. - -If you can't -open the project file for some reason, make a new one, naming these files: -@code{blocksort.c}, @code{bzlib.c}, @code{compress.c}, -@code{crctable.c}, @code{decompress.c}, @code{huffman.c}, @* -@code{randtable.c} and @code{libbz2.def}. You will also need -to name the header files @code{bzlib.h} and @code{bzlib_private.h}. - -If you don't use VC++, you may need to define the proprocessor symbol -@code{_WIN32}. - -Finally, @code{dlltest.c} is a sample program using the DLL. It has a -project file, @code{dlltest.dsp}. - -If you just want a makefile for Visual C, have a look at -@code{makefile.msc}. - -Be aware that if you compile @code{bzip2} itself on Win32, you must set -@code{BZ_UNIX} to 0 and @code{BZ_LCCWIN32} to 1, in the file -@code{bzip2.c}, before compiling. Otherwise the resulting binary won't -work correctly. - -I haven't tried any of this stuff myself, but it all looks plausible. - - - -@chapter Miscellanea - -These are just some random thoughts of mine. Your mileage may -vary. - -@section Limitations of the compressed file format -@code{bzip2-1.0}, @code{0.9.5} and @code{0.9.0} -use exactly the same file format as the previous -version, @code{bzip2-0.1}. This decision was made in the interests of -stability. Creating yet another incompatible compressed file format -would create further confusion and disruption for users. - -Nevertheless, this is not a painless decision. Development -work since the release of @code{bzip2-0.1} in August 1997 -has shown complexities in the file format which slow down -decompression and, in retrospect, are unnecessary. These are: -@itemize @bullet -@item The run-length encoder, which is the first of the - compression transformations, is entirely irrelevant. - The original purpose was to protect the sorting algorithm - from the very worst case input: a string of repeated - symbols. But algorithm steps Q6a and Q6b in the original - Burrows-Wheeler technical report (SRC-124) show how - repeats can be handled without difficulty in block - sorting. -@item The randomisation mechanism doesn't really need to be - there. Udi Manber and Gene Myers published a suffix - array construction algorithm a few years back, which - can be employed to sort any block, no matter how - repetitive, in O(N log N) time. Subsequent work by - Kunihiko Sadakane has produced a derivative O(N (log N)^2) - algorithm which usually outperforms the Manber-Myers - algorithm. - - I could have changed to Sadakane's algorithm, but I find - it to be slower than @code{bzip2}'s existing algorithm for - most inputs, and the randomisation mechanism protects - adequately against bad cases. I didn't think it was - a good tradeoff to make. Partly this is due to the fact - that I was not flooded with email complaints about - @code{bzip2-0.1}'s performance on repetitive data, so - perhaps it isn't a problem for real inputs. - - Probably the best long-term solution, - and the one I have incorporated into 0.9.5 and above, - is to use the existing sorting - algorithm initially, and fall back to a O(N (log N)^2) - algorithm if the standard algorithm gets into difficulties. -@item The compressed file format was never designed to be - handled by a library, and I have had to jump though - some hoops to produce an efficient implementation of - decompression. It's a bit hairy. Try passing - @code{decompress.c} through the C preprocessor - and you'll see what I mean. Much of this complexity - could have been avoided if the compressed size of - each block of data was recorded in the data stream. -@item An Adler-32 checksum, rather than a CRC32 checksum, - would be faster to compute. -@end itemize -It would be fair to say that the @code{bzip2} format was frozen -before I properly and fully understood the performance -consequences of doing so. - -Improvements which I was able to incorporate into -0.9.0, despite using the same file format, are: -@itemize @bullet -@item Single array implementation of the inverse BWT. This - significantly speeds up decompression, presumably - because it reduces the number of cache misses. -@item Faster inverse MTF transform for large MTF values. The - new implementation is based on the notion of sliding blocks - of values. -@item @code{bzip2-0.9.0} now reads and writes files with @code{fread} - and @code{fwrite}; version 0.1 used @code{putc} and @code{getc}. - Duh! Well, you live and learn. - -@end itemize -Further ahead, it would be nice -to be able to do random access into files. This will -require some careful design of compressed file formats. - - - -@section Portability issues -After some consideration, I have decided not to use -GNU @code{autoconf} to configure 0.9.5 or 1.0. - -@code{autoconf}, admirable and wonderful though it is, -mainly assists with portability problems between Unix-like -platforms. But @code{bzip2} doesn't have much in the way -of portability problems on Unix; most of the difficulties appear -when porting to the Mac, or to Microsoft's operating systems. -@code{autoconf} doesn't help in those cases, and brings in a -whole load of new complexity. - -Most people should be able to compile the library and program -under Unix straight out-of-the-box, so to speak, especially -if you have a version of GNU C available. - -There are a couple of @code{__inline__} directives in the code. GNU C -(@code{gcc}) should be able to handle them. If you're not using -GNU C, your C compiler shouldn't see them at all. -If your compiler does, for some reason, see them and doesn't -like them, just @code{#define} @code{__inline__} to be @code{/* */}. One -easy way to do this is to compile with the flag @code{-D__inline__=}, -which should be understood by most Unix compilers. - -If you still have difficulties, try compiling with the macro -@code{BZ_STRICT_ANSI} defined. This should enable you to build the -library in a strictly ANSI compliant environment. Building the program -itself like this is dangerous and not supported, since you remove -@code{bzip2}'s checks against compressing directories, symbolic links, -devices, and other not-really-a-file entities. This could cause -filesystem corruption! - -One other thing: if you create a @code{bzip2} binary for public -distribution, please try and link it statically (@code{gcc -s}). This -avoids all sorts of library-version issues that others may encounter -later on. - -If you build @code{bzip2} on Win32, you must set @code{BZ_UNIX} to 0 and -@code{BZ_LCCWIN32} to 1, in the file @code{bzip2.c}, before compiling. -Otherwise the resulting binary won't work correctly. - - - -@section Reporting bugs -I tried pretty hard to make sure @code{bzip2} is -bug free, both by design and by testing. Hopefully -you'll never need to read this section for real. - -Nevertheless, if @code{bzip2} dies with a segmentation -fault, a bus error or an internal assertion failure, it -will ask you to email me a bug report. Experience with -version 0.1 shows that almost all these problems can -be traced to either compiler bugs or hardware problems. -@itemize @bullet -@item -Recompile the program with no optimisation, and see if it -works. And/or try a different compiler. -I heard all sorts of stories about various flavours -of GNU C (and other compilers) generating bad code for -@code{bzip2}, and I've run across two such examples myself. - -2.7.X versions of GNU C are known to generate bad code from -time to time, at high optimisation levels. -If you get problems, try using the flags -@code{-O2} @code{-fomit-frame-pointer} @code{-fno-strength-reduce}. -You should specifically @emph{not} use @code{-funroll-loops}. - -You may notice that the Makefile runs six tests as part of -the build process. If the program passes all of these, it's -a pretty good (but not 100%) indication that the compiler has -done its job correctly. -@item -If @code{bzip2} crashes randomly, and the crashes are not -repeatable, you may have a flaky memory subsystem. @code{bzip2} -really hammers your memory hierarchy, and if it's a bit marginal, -you may get these problems. Ditto if your disk or I/O subsystem -is slowly failing. Yup, this really does happen. - -Try using a different machine of the same type, and see if -you can repeat the problem. -@item This isn't really a bug, but ... If @code{bzip2} tells -you your file is corrupted on decompression, and you -obtained the file via FTP, there is a possibility that you -forgot to tell FTP to do a binary mode transfer. That absolutely -will cause the file to be non-decompressible. You'll have to transfer -it again. -@end itemize - -If you've incorporated @code{libbzip2} into your own program -and are getting problems, please, please, please, check that the -parameters you are passing in calls to the library, are -correct, and in accordance with what the documentation says -is allowable. I have tried to make the library robust against -such problems, but I'm sure I haven't succeeded. - -Finally, if the above comments don't help, you'll have to send -me a bug report. Now, it's just amazing how many people will -send me a bug report saying something like -@display - bzip2 crashed with segmentation fault on my machine -@end display -and absolutely nothing else. Needless to say, a such a report -is @emph{totally, utterly, completely and comprehensively 100% useless; -a waste of your time, my time, and net bandwidth}. -With no details at all, there's no way I can possibly begin -to figure out what the problem is. - -The rules of the game are: facts, facts, facts. Don't omit -them because "oh, they won't be relevant". At the bare -minimum: -@display - Machine type. Operating system version. - Exact version of @code{bzip2} (do @code{bzip2 -V}). - Exact version of the compiler used. - Flags passed to the compiler. -@end display -However, the most important single thing that will help me is -the file that you were trying to compress or decompress at the -time the problem happened. Without that, my ability to do anything -more than speculate about the cause, is limited. - -Please remember that I connect to the Internet with a modem, so -you should contact me before mailing me huge files. - - -@section Did you get the right package? - -@code{bzip2} is a resource hog. It soaks up large amounts of CPU cycles -and memory. Also, it gives very large latencies. In the worst case, you -can feed many megabytes of uncompressed data into the library before -getting any compressed output, so this probably rules out applications -requiring interactive behaviour. - -These aren't faults of my implementation, I hope, but more -an intrinsic property of the Burrows-Wheeler transform (unfortunately). -Maybe this isn't what you want. - -If you want a compressor and/or library which is faster, uses less -memory but gets pretty good compression, and has minimal latency, -consider Jean-loup -Gailly's and Mark Adler's work, @code{zlib-1.1.3} and -@code{gzip-1.2.4}. Look for them at - -@code{http://www.zlib.org} and -@code{http://www.gzip.org} respectively. - -For something faster and lighter still, you might try Markus F X J -Oberhumer's @code{LZO} real-time compression/decompression library, at -@* @code{http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html}. - -If you want to use the @code{bzip2} algorithms to compress small blocks -of data, 64k bytes or smaller, for example on an on-the-fly disk -compressor, you'd be well advised not to use this library. Instead, -I've made a special library tuned for that kind of use. It's part of -@code{e2compr-0.40}, an on-the-fly disk compressor for the Linux -@code{ext2} filesystem. Look at -@code{http://www.netspace.net.au/~reiter/e2compr}. - - - -@section Testing - -A record of the tests I've done. - -First, some data sets: -@itemize @bullet -@item B: a directory containing 6001 files, one for every length in the - range 0 to 6000 bytes. The files contain random lowercase - letters. 18.7 megabytes. -@item H: my home directory tree. Documents, source code, mail files, - compressed data. H contains B, and also a directory of - files designed as boundary cases for the sorting; mostly very - repetitive, nasty files. 565 megabytes. -@item A: directory tree holding various applications built from source: - @code{egcs}, @code{gcc-2.8.1}, KDE, GTK, Octave, etc. - 2200 megabytes. -@end itemize -The tests conducted are as follows. Each test means compressing -(a copy of) each file in the data set, decompressing it and -comparing it against the original. - -First, a bunch of tests with block sizes and internal buffer -sizes set very small, -to detect any problems with the -blocking and buffering mechanisms. -This required modifying the source code so as to try to -break it. -@enumerate -@item Data set H, with - buffer size of 1 byte, and block size of 23 bytes. -@item Data set B, buffer sizes 1 byte, block size 1 byte. -@item As (2) but small-mode decompression. -@item As (2) with block size 2 bytes. -@item As (2) with block size 3 bytes. -@item As (2) with block size 4 bytes. -@item As (2) with block size 5 bytes. -@item As (2) with block size 6 bytes and small-mode decompression. -@item H with buffer size of 1 byte, but normal block - size (up to 900000 bytes). -@end enumerate -Then some tests with unmodified source code. -@enumerate -@item H, all settings normal. -@item As (1), with small-mode decompress. -@item H, compress with flag @code{-1}. -@item H, compress with flag @code{-s}, decompress with flag @code{-s}. -@item Forwards compatibility: H, @code{bzip2-0.1pl2} compressing, - @code{bzip2-0.9.5} decompressing, all settings normal. -@item Backwards compatibility: H, @code{bzip2-0.9.5} compressing, - @code{bzip2-0.1pl2} decompressing, all settings normal. -@item Bigger tests: A, all settings normal. -@item As (7), using the fallback (Sadakane-like) sorting algorithm. -@item As (8), compress with flag @code{-1}, decompress with flag - @code{-s}. -@item H, using the fallback sorting algorithm. -@item Forwards compatibility: A, @code{bzip2-0.1pl2} compressing, - @code{bzip2-0.9.5} decompressing, all settings normal. -@item Backwards compatibility: A, @code{bzip2-0.9.5} compressing, - @code{bzip2-0.1pl2} decompressing, all settings normal. -@item Misc test: about 400 megabytes of @code{.tar} files with - @code{bzip2} compiled with Checker (a memory access error - detector, like Purify). -@item Misc tests to make sure it builds and runs ok on non-Linux/x86 - platforms. -@end enumerate -These tests were conducted on a 225 MHz IDT WinChip machine, running -Linux 2.0.36. They represent nearly a week of continuous computation. -All tests completed successfully. - - -@section Further reading -@code{bzip2} is not research work, in the sense that it doesn't present -any new ideas. Rather, it's an engineering exercise based on existing -ideas. - -Four documents describe essentially all the ideas behind @code{bzip2}: -@example -Michael Burrows and D. J. Wheeler: - "A block-sorting lossless data compression algorithm" - 10th May 1994. - Digital SRC Research Report 124. - ftp://ftp.digital.com/pub/DEC/SRC/research-reports/SRC-124.ps.gz - If you have trouble finding it, try searching at the - New Zealand Digital Library, http://www.nzdl.org. - -Daniel S. Hirschberg and Debra A. LeLewer - "Efficient Decoding of Prefix Codes" - Communications of the ACM, April 1990, Vol 33, Number 4. - You might be able to get an electronic copy of this - from the ACM Digital Library. - -David J. Wheeler - Program bred3.c and accompanying document bred3.ps. - This contains the idea behind the multi-table Huffman - coding scheme. - ftp://ftp.cl.cam.ac.uk/users/djw3/ - -Jon L. Bentley and Robert Sedgewick - "Fast Algorithms for Sorting and Searching Strings" - Available from Sedgewick's web page, - www.cs.princeton.edu/~rs -@end example -The following paper gives valuable additional insights into the -algorithm, but is not immediately the basis of any code -used in bzip2. -@example -Peter Fenwick: - Block Sorting Text Compression - Proceedings of the 19th Australasian Computer Science Conference, - Melbourne, Australia. Jan 31 - Feb 2, 1996. - ftp://ftp.cs.auckland.ac.nz/pub/peter-f/ACSC96paper.ps -@end example -Kunihiko Sadakane's sorting algorithm, mentioned above, -is available from: -@example -http://naomi.is.s.u-tokyo.ac.jp/~sada/papers/Sada98b.ps.gz -@end example -The Manber-Myers suffix array construction -algorithm is described in a paper -available from: -@example -http://www.cs.arizona.edu/people/gene/PAPERS/suffix.ps -@end example -Finally, the following paper documents some recent investigations -I made into the performance of sorting algorithms: -@example -Julian Seward: - On the Performance of BWT Sorting Algorithms - Proceedings of the IEEE Data Compression Conference 2000 - Snowbird, Utah. 28-30 March 2000. -@end example - - -@contents - -@bye - diff --git a/manual_1.html b/manual_1.html deleted file mode 100644 index 15f86c9..0000000 --- a/manual_1.html +++ /dev/null @@ -1,81 +0,0 @@ - - - - - -Untitled Document: 1. Introduction - - - - - - - - - - - - - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

1. Introduction

- -

- -bzip2 compresses files using the Burrows-Wheeler -block-sorting text compression algorithm, and Huffman coding. -Compression is generally considerably better than that -achieved by more conventional LZ77/LZ78-based compressors, -and approaches the performance of the PPM family of statistical compressors. -

- -bzip2 is built on top of libbzip2, a flexible library -for handling compressed data in the bzip2 format. This manual -describes both how to use the program and -how to work with the library interface. Most of the -manual is devoted to this library, not the program, -which is good news if your interest is only in the program. -

- -Chapter 2 describes how to use bzip2; this is the only part -you need to read if you just want to know how to operate the program. -Chapter 3 describes the programming interfaces in detail, and -Chapter 4 records some miscellaneous notes which I thought -ought to be recorded somewhere. -

- -


- - - - - - - -
[ << ][ >> ]           [Top][Contents][Index][ ? ]
-
- -This document was generated -by Julian Seward on January, 5 2002 -using texi2html - - - diff --git a/manual_2.html b/manual_2.html deleted file mode 100644 index a3bebc3..0000000 --- a/manual_2.html +++ /dev/null @@ -1,579 +0,0 @@ - - - - - -Untitled Document: 2. How to use <CODE>bzip2</CODE> - - - - - - - - - - - - - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

2. How to use bzip2

- -

- -This chapter contains a copy of the bzip2 man page, -and nothing else. -

- -

-

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

NAME

- -
    -
  • bzip2, bunzip2 -- a block-sorting file compressor, v1.0.2 -
  • bzcat -- decompresses files to stdout -
  • bzip2recover -- recovers data from damaged bzip2 files -
-

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

SYNOPSIS

- -
    -
  • bzip2 [ -cdfkqstvzVL123456789 ] [ filenames ... ] -
  • bunzip2 [ -fkvsVL ] [ filenames ... ] -
  • bzcat [ -s ] [ filenames ... ] -
  • bzip2recover filename -
-

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

DESCRIPTION

- -

- -bzip2 compresses files using the Burrows-Wheeler block sorting -text compression algorithm, and Huffman coding. Compression is -generally considerably better than that achieved by more conventional -LZ77/LZ78-based compressors, and approaches the performance of the PPM -family of statistical compressors. -

- -The command-line options are deliberately very similar to those of GNU -gzip, but they are not identical. -

- -bzip2 expects a list of file names to accompany the command-line -flags. Each file is replaced by a compressed version of itself, with -the name original_name.bz2. Each compressed file has the same -modification date, permissions, and, when possible, ownership as the -corresponding original, so that these properties can be correctly -restored at decompression time. File name handling is naive in the -sense that there is no mechanism for preserving original file names, -permissions, ownerships or dates in filesystems which lack these -concepts, or have serious file name length restrictions, such as MS-DOS. -

- -bzip2 and bunzip2 will by default not overwrite existing -files. If you want this to happen, specify the -f flag. -

- -If no file names are specified, bzip2 compresses from standard -input to standard output. In this case, bzip2 will decline to -write compressed output to a terminal, as this would be entirely -incomprehensible and therefore pointless. -

- -bunzip2 (or bzip2 -d) decompresses all -specified files. Files which were not created by bzip2 -will be detected and ignored, and a warning issued. -bzip2 attempts to guess the filename for the decompressed file -from that of the compressed file as follows: -

    -
  • filename.bz2 becomes filename -
  • filename.bz becomes filename -
  • filename.tbz2 becomes filename.tar -
  • filename.tbz becomes filename.tar -
  • anyothername becomes anyothername.out -
-If the file does not end in one of the recognised endings, -.bz2, .bz, -.tbz2 or .tbz, bzip2 complains that it cannot -guess the name of the original file, and uses the original name -with .out appended. -

- -As with compression, supplying no -filenames causes decompression from standard input to standard output. -

- -bunzip2 will correctly decompress a file which is the -concatenation of two or more compressed files. The result is the -concatenation of the corresponding uncompressed files. Integrity -testing (-t) of concatenated compressed files is also supported. -

- -You can also compress or decompress files to the standard output by -giving the -c flag. Multiple files may be compressed and -decompressed like this. The resulting outputs are fed sequentially to -stdout. Compression of multiple files in this manner generates a stream -containing multiple compressed file representations. Such a stream -can be decompressed correctly only by bzip2 version 0.9.0 or -later. Earlier versions of bzip2 will stop after decompressing -the first file in the stream. -

- -bzcat (or bzip2 -dc) decompresses all specified files to -the standard output. -

- -bzip2 will read arguments from the environment variables -BZIP2 and BZIP, in that order, and will process them -before any arguments read from the command line. This gives a -convenient way to supply default arguments. -

- -Compression is always performed, even if the compressed file is slightly -larger than the original. Files of less than about one hundred bytes -tend to get larger, since the compression mechanism has a constant -overhead in the region of 50 bytes. Random data (including the output -of most file compressors) is coded at about 8.05 bits per byte, giving -an expansion of around 0.5%. -

- -As a self-check for your protection, bzip2 uses 32-bit CRCs to -make sure that the decompressed version of a file is identical to the -original. This guards against corruption of the compressed data, and -against undetected bugs in bzip2 (hopefully very unlikely). The -chances of data corruption going undetected is microscopic, about one -chance in four billion for each file processed. Be aware, though, that -the check occurs upon decompression, so it can only tell you that -something is wrong. It can't help you recover the original uncompressed -data. You can use bzip2recover to try to recover data from -damaged files. -

- -Return values: 0 for a normal exit, 1 for environmental problems (file -not found, invalid flags, I/O errors, &c), 2 to indicate a corrupt -compressed file, 3 for an internal consistency error (eg, bug) which -caused bzip2 to panic. -

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

OPTIONS

- -
-
-c --stdout -
Compress or decompress to standard output. -
-d --decompress -
Force decompression. bzip2, bunzip2 and bzcat are -really the same program, and the decision about what actions to take is -done on the basis of which name is used. This flag overrides that -mechanism, and forces bzip2 to decompress. -
-z --compress -
The complement to -d: forces compression, regardless of the -invokation name. -
-t --test -
Check integrity of the specified file(s), but don't decompress them. -This really performs a trial decompression and throws away the result. -
-f --force -
Force overwrite of output files. Normally, bzip2 will not overwrite -existing output files. Also forces bzip2 to break hard links -to files, which it otherwise wouldn't do. -

- -bzip2 normally declines to decompress files which don't have the -correct magic header bytes. If forced (-f), however, it will -pass such files through unmodified. This is how GNU gzip -behaves. -

-k --keep -
Keep (don't delete) input files during compression -or decompression. -
-s --small -
Reduce memory usage, for compression, decompression and testing. Files -are decompressed and tested using a modified algorithm which only -requires 2.5 bytes per block byte. This means any file can be -decompressed in 2300k of memory, albeit at about half the normal speed. -

- -During compression, -s selects a block size of 200k, which limits -memory use to around the same figure, at the expense of your compression -ratio. In short, if your machine is low on memory (8 megabytes or -less), use -s for everything. See MEMORY MANAGEMENT below. -

-q --quiet -
Suppress non-essential warning messages. Messages pertaining to -I/O errors and other critical events will not be suppressed. -
-v --verbose -
Verbose mode -- show the compression ratio for each file processed. -Further -v's increase the verbosity level, spewing out lots of -information which is primarily of interest for diagnostic purposes. -
-L --license -V --version -
Display the software version, license terms and conditions. -
-1 (or --fast) to -9 (or --best) -
Set the block size to 100 k, 200 k .. 900 k when compressing. Has no -effect when decompressing. See MEMORY MANAGEMENT below. -The --fast and --best aliases are primarily for GNU -gzip compatibility. In particular, --fast doesn't make -things significantly faster. And --best merely selects the -default behaviour. -
-- -
Treats all subsequent arguments as file names, even if they start -with a dash. This is so you can handle files with names beginning -with a dash, for example: bzip2 -- -myfilename. -
--repetitive-fast -
--repetitive-best -
These flags are redundant in versions 0.9.5 and above. They provided -some coarse control over the behaviour of the sorting algorithm in -earlier versions, which was sometimes useful. 0.9.5 and above have an -improved algorithm which renders these flags irrelevant. -
-

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

MEMORY MANAGEMENT

- -

- -bzip2 compresses large files in blocks. The block size affects -both the compression ratio achieved, and the amount of memory needed for -compression and decompression. The flags -1 through -9 -specify the block size to be 100,000 bytes through 900,000 bytes (the -default) respectively. At decompression time, the block size used for -compression is read from the header of the compressed file, and -bunzip2 then allocates itself just enough memory to decompress -the file. Since block sizes are stored in compressed files, it follows -that the flags -1 to -9 are irrelevant to and so ignored -during decompression. -

- -Compression and decompression requirements, in bytes, can be estimated -as: -
 
     Compression:   400k + ( 8 x block size )
-
-     Decompression: 100k + ( 4 x block size ), or
-                    100k + ( 2.5 x block size )
-
Larger block sizes give rapidly diminishing marginal returns. Most of -the compression comes from the first two or three hundred k of block -size, a fact worth bearing in mind when using bzip2 on small machines. -It is also important to appreciate that the decompression memory -requirement is set at compression time by the choice of block size. -

- -For files compressed with the default 900k block size, bunzip2 -will require about 3700 kbytes to decompress. To support decompression -of any file on a 4 megabyte machine, bunzip2 has an option to -decompress using approximately half this amount of memory, about 2300 -kbytes. Decompression speed is also halved, so you should use this -option only where necessary. The relevant flag is -s. -

- -In general, try and use the largest block size memory constraints allow, -since that maximises the compression achieved. Compression and -decompression speed are virtually unaffected by block size. -

- -Another significant point applies to files which fit in a single block --- that means most files you'd encounter using a large block size. The -amount of real memory touched is proportional to the size of the file, -since the file is smaller than a block. For example, compressing a file -20,000 bytes long with the flag -9 will cause the compressor to -allocate around 7600k of memory, but only touch 400k + 20000 * 8 = 560 -kbytes of it. Similarly, the decompressor will allocate 3700k but only -touch 100k + 20000 * 4 = 180 kbytes. -

- -Here is a table which summarises the maximum memory usage for different -block sizes. Also recorded is the total compressed size for 14 files of -the Calgary Text Compression Corpus totalling 3,141,622 bytes. This -column gives some feel for how compression varies with block size. -These figures tend to understate the advantage of larger block sizes for -larger files, since the Corpus is dominated by smaller files. -
 
          Compress   Decompress   Decompress   Corpus
-   Flag     usage      usage       -s usage     Size
-
-    -1      1200k       500k         350k      914704
-    -2      2000k       900k         600k      877703
-    -3      2800k      1300k         850k      860338
-    -4      3600k      1700k        1100k      846899
-    -5      4400k      2100k        1350k      845160
-    -6      5200k      2500k        1600k      838626
-    -7      6100k      2900k        1850k      834096
-    -8      6800k      3300k        2100k      828642
-    -9      7600k      3700k        2350k      828642
-

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

RECOVERING DATA FROM DAMAGED FILES

- -

- -bzip2 compresses files in blocks, usually 900kbytes long. Each -block is handled independently. If a media or transmission error causes -a multi-block .bz2 file to become damaged, it may be possible to -recover data from the undamaged blocks in the file. -

- -The compressed representation of each block is delimited by a 48-bit -pattern, which makes it possible to find the block boundaries with -reasonable certainty. Each block also carries its own 32-bit CRC, so -damaged blocks can be distinguished from undamaged ones. -

- -bzip2recover is a simple program whose purpose is to search for -blocks in .bz2 files, and write each block out into its own -.bz2 file. You can then use bzip2 -t to test the -integrity of the resulting files, and decompress those which are -undamaged. -

- -bzip2recover -takes a single argument, the name of the damaged file, and writes a -number of files rec00001file.bz2, rec00002file.bz2, etc, -containing the extracted blocks. The output filenames are designed so -that the use of wildcards in subsequent processing -- for example, -bzip2 -dc rec*file.bz2 > recovered_data -- processes the files in -the correct order. -

- -bzip2recover should be of most use dealing with large .bz2 -files, as these will contain many blocks. It is clearly futile to use -it on damaged single-block files, since a damaged block cannot be -recovered. If you wish to minimise any potential data loss through -media or transmission errors, you might consider compressing with a -smaller block size. -

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

PERFORMANCE NOTES

- -

- -The sorting phase of compression gathers together similar strings in the -file. Because of this, files containing very long runs of repeated -symbols, like "aabaabaabaab ..." (repeated several hundred times) may -compress more slowly than normal. Versions 0.9.5 and above fare much -better than previous versions in this respect. The ratio between -worst-case and average-case compression time is in the region of 10:1. -For previous versions, this figure was more like 100:1. You can use the --vvvv option to monitor progress in great detail, if you want. -

- -Decompression speed is unaffected by these phenomena. -

- -bzip2 usually allocates several megabytes of memory to operate -in, and then charges all over it in a fairly random fashion. This means -that performance, both for compressing and decompressing, is largely -determined by the speed at which your machine can service cache misses. -Because of this, small changes to the code to reduce the miss rate have -been observed to give disproportionately large performance improvements. -I imagine bzip2 will perform best on machines with very large -caches. -

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

CAVEATS

- -

- -I/O error messages are not as helpful as they could be. bzip2 -tries hard to detect I/O errors and exit cleanly, but the details of -what the problem is sometimes seem rather misleading. -

- -This manual page pertains to version 1.0.2 of bzip2. Compressed -data created by this version is entirely forwards and backwards -compatible with the previous public releases, versions 0.1pl2, 0.9.0, -0.9.5, 1.0.0 and 1.0.1, but with the following exception: 0.9.0 and -above can correctly decompress multiple concatenated compressed files. -0.1pl2 cannot do this; it will stop after decompressing just the first -file in the stream. -

- -bzip2recover versions prior to this one, 1.0.2, used 32-bit -integers to represent bit positions in compressed files, so it could not -handle compressed files more than 512 megabytes long. Version 1.0.2 and -above uses 64-bit ints on some platforms which support them (GNU -supported targets, and Windows). To establish whether or not -bzip2recover was built with such a limitation, run it without -arguments. In any event you can build yourself an unlimited version if -you can recompile it with MaybeUInt64 set to be an unsigned -64-bit integer. -

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

AUTHOR

- -Julian Seward, jseward@acm.org. -

- -http://sources.redhat.com/bzip2 -

- -The ideas embodied in bzip2 are due to (at least) the following -people: Michael Burrows and David Wheeler (for the block sorting -transformation), David Wheeler (again, for the Huffman coder), Peter -Fenwick (for the structured coding model in the original bzip, -and many refinements), and Alistair Moffat, Radford Neal and Ian Witten -(for the arithmetic coder in the original bzip). I am much -indebted for their help, support and advice. See the manual in the -source distribution for pointers to sources of documentation. Christian -von Roques encouraged me to look for faster sorting algorithms, so as to -speed up compression. Bela Lubkin encouraged me to improve the -worst-case compression performance. The bz* scripts are derived -from those of GNU gzip. Many people sent patches, helped with -portability problems, lent machines, gave advice and were generally -helpful. -

- -

- -
- - - - - - - -
[ << ][ >> ]           [Top][Contents][Index][ ? ]
-
- -This document was generated -by Julian Seward on January, 5 2002 -using texi2html - - - diff --git a/manual_3.html b/manual_3.html deleted file mode 100644 index 841d14d..0000000 --- a/manual_3.html +++ /dev/null @@ -1,1855 +0,0 @@ - - - - - -Untitled Document: 3. Programming with <CODE>libbzip2</CODE> - - - - - - - - - - - - - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3. Programming with libbzip2

- -

- -This chapter describes the programming interface to libbzip2. -

- -For general background information, particularly about memory -use and performance aspects, you'd be well advised to read Chapter 2 -as well. -

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.1 Top-level structure

- -

- -libbzip2 is a flexible library for compressing and decompressing -data in the bzip2 data format. Although packaged as a single -entity, it helps to regard the library as three separate parts: the low -level interface, and the high level interface, and some utility -functions. -

- -The structure of libbzip2's interfaces is similar to -that of Jean-loup Gailly's and Mark Adler's excellent zlib -library. -

- -All externally visible symbols have names beginning BZ2_. -This is new in version 1.0. The intention is to minimise pollution -of the namespaces of library clients. -

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.1.1 Low-level summary

- -

- -This interface provides services for compressing and decompressing -data in memory. There's no provision for dealing with files, streams -or any other I/O mechanisms, just straight memory-to-memory work. -In fact, this part of the library can be compiled without inclusion -of stdio.h, which may be helpful for embedded applications. -

- -The low-level part of the library has no global variables and -is therefore thread-safe. -

- -Six routines make up the low level interface: -BZ2_bzCompressInit, BZ2_bzCompress, and
BZ2_bzCompressEnd -for compression, -and a corresponding trio BZ2_bzDecompressInit,
BZ2_bzDecompress -and BZ2_bzDecompressEnd for decompression. -The *Init functions allocate -memory for compression/decompression and do other -initialisations, whilst the *End functions close down operations -and release memory. -

- -The real work is done by BZ2_bzCompress and BZ2_bzDecompress. -These compress and decompress data from a user-supplied input buffer -to a user-supplied output buffer. These buffers can be any size; -arbitrary quantities of data are handled by making repeated calls -to these functions. This is a flexible mechanism allowing a -consumer-pull style of activity, or producer-push, or a mixture of -both. -

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.1.2 High-level summary

- -

- -This interface provides some handy wrappers around the low-level -interface to facilitate reading and writing bzip2 format -files (.bz2 files). The routines provide hooks to facilitate -reading files in which the bzip2 data stream is embedded -within some larger-scale file structure, or where there are -multiple bzip2 data streams concatenated end-to-end. -

- -For reading files, BZ2_bzReadOpen, BZ2_bzRead, -BZ2_bzReadClose and
BZ2_bzReadGetUnused are supplied. For -writing files, BZ2_bzWriteOpen, BZ2_bzWrite and -BZ2_bzWriteFinish are available. -

- -As with the low-level library, no global variables are used -so the library is per se thread-safe. However, if I/O errors -occur whilst reading or writing the underlying compressed files, -you may have to consult errno to determine the cause of -the error. In that case, you'd need a C library which correctly -supports errno in a multithreaded environment. -

- -To make the library a little simpler and more portable, -BZ2_bzReadOpen and BZ2_bzWriteOpen require you to pass them file -handles (FILE*s) which have previously been opened for reading or -writing respectively. That avoids portability problems associated with -file operations and file attributes, whilst not being much of an -imposition on the programmer. -

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.1.3 Utility functions summary

- -For very simple needs, BZ2_bzBuffToBuffCompress and -BZ2_bzBuffToBuffDecompress are provided. These compress -data in memory from one buffer to another buffer in a single -function call. You should assess whether these functions -fulfill your memory-to-memory compression/decompression -requirements before investing effort in understanding the more -general but more complex low-level interface. -

- -Yoshioka Tsuneo (QWF00133@niftyserve.or.jp / -tsuneo-y@is.aist-nara.ac.jp) has contributed some functions to -give better zlib compatibility. These functions are -BZ2_bzopen, BZ2_bzread, BZ2_bzwrite, BZ2_bzflush, -BZ2_bzclose, -BZ2_bzerror and BZ2_bzlibVersion. You may find these functions -more convenient for simple file reading and writing, than those in the -high-level interface. These functions are not (yet) officially part of -the library, and are minimally documented here. If they break, you -get to keep all the pieces. I hope to document them properly when time -permits. -

- -Yoshioka also contributed modifications to allow the library to be -built as a Windows DLL. -

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.2 Error handling

- -

- -The library is designed to recover cleanly in all situations, including -the worst-case situation of decompressing random data. I'm not -100% sure that it can always do this, so you might want to add -a signal handler to catch segmentation violations during decompression -if you are feeling especially paranoid. I would be interested in -hearing more about the robustness of the library to corrupted -compressed data. -

- -Version 1.0 is much more robust in this respect than -0.9.0 or 0.9.5. Investigations with Checker (a tool for -detecting problems with memory management, similar to Purify) -indicate that, at least for the few files I tested, all single-bit -errors in the decompressed data are caught properly, with no -segmentation faults, no reads of uninitialised data and no -out of range reads or writes. So it's certainly much improved, -although I wouldn't claim it to be totally bombproof. -

- -The file bzlib.h contains all definitions needed to use -the library. In particular, you should definitely not include -bzlib_private.h. -

- -In bzlib.h, the various return values are defined. The following -list is not intended as an exhaustive description of the circumstances -in which a given value may be returned -- those descriptions are given -later. Rather, it is intended to convey the rough meaning of each -return value. The first five actions are normal and not intended to -denote an error situation. -

-
BZ_OK -
The requested action was completed successfully. -
BZ_RUN_OK -
BZ_FLUSH_OK -
BZ_FINISH_OK -
In BZ2_bzCompress, the requested flush/finish/nothing-special action -was completed successfully. -
BZ_STREAM_END -
Compression of data was completed, or the logical stream end was -detected during decompression. -
-

- -The following return values indicate an error of some kind. -

-
BZ_CONFIG_ERROR -
Indicates that the library has been improperly compiled on your -platform -- a major configuration error. Specifically, it means -that sizeof(char), sizeof(short) and sizeof(int) -are not 1, 2 and 4 respectively, as they should be. Note that the -library should still work properly on 64-bit platforms which follow -the LP64 programming model -- that is, where sizeof(long) -and sizeof(void*) are 8. Under LP64, sizeof(int) is -still 4, so libbzip2, which doesn't use the long type, -is OK. -
BZ_SEQUENCE_ERROR -
When using the library, it is important to call the functions in the -correct sequence and with data structures (buffers etc) in the correct -states. libbzip2 checks as much as it can to ensure this is -happening, and returns BZ_SEQUENCE_ERROR if not. Code which -complies precisely with the function semantics, as detailed below, -should never receive this value; such an event denotes buggy code -which you should investigate. -
BZ_PARAM_ERROR -
Returned when a parameter to a function call is out of range -or otherwise manifestly incorrect. As with BZ_SEQUENCE_ERROR, -this denotes a bug in the client code. The distinction between -BZ_PARAM_ERROR and BZ_SEQUENCE_ERROR is a bit hazy, but still worth -making. -
BZ_MEM_ERROR -
Returned when a request to allocate memory failed. Note that the -quantity of memory needed to decompress a stream cannot be determined -until the stream's header has been read. So BZ2_bzDecompress and -BZ2_bzRead may return BZ_MEM_ERROR even though some of -the compressed data has been read. The same is not true for -compression; once BZ2_bzCompressInit or BZ2_bzWriteOpen have -successfully completed, BZ_MEM_ERROR cannot occur. -
BZ_DATA_ERROR -
Returned when a data integrity error is detected during decompression. -Most importantly, this means when stored and computed CRCs for the -data do not match. This value is also returned upon detection of any -other anomaly in the compressed data. -
BZ_DATA_ERROR_MAGIC -
As a special case of BZ_DATA_ERROR, it is sometimes useful to -know when the compressed stream does not start with the correct -magic bytes ('B' 'Z' 'h'). -
BZ_IO_ERROR -
Returned by BZ2_bzRead and BZ2_bzWrite when there is an error -reading or writing in the compressed file, and by BZ2_bzReadOpen -and BZ2_bzWriteOpen for attempts to use a file for which the -error indicator (viz, ferror(f)) is set. -On receipt of BZ_IO_ERROR, the caller should consult -errno and/or perror to acquire operating-system -specific information about the problem. -
BZ_UNEXPECTED_EOF -
Returned by BZ2_bzRead when the compressed file finishes -before the logical end of stream is detected. -
BZ_OUTBUFF_FULL -
Returned by BZ2_bzBuffToBuffCompress and -BZ2_bzBuffToBuffDecompress to indicate that the output data -will not fit into the output buffer provided. -
-

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.3 Low-level interface

- -

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.3.1 BZ2_bzCompressInit

- -
 
typedef 
-   struct {
-      char *next_in;
-      unsigned int avail_in;
-      unsigned int total_in_lo32;
-      unsigned int total_in_hi32;
-
-      char *next_out;
-      unsigned int avail_out;
-      unsigned int total_out_lo32;
-      unsigned int total_out_hi32;
-
-      void *state;
-
-      void *(*bzalloc)(void *,int,int);
-      void (*bzfree)(void *,void *);
-      void *opaque;
-   } 
-   bz_stream;
-
-int BZ2_bzCompressInit ( bz_stream *strm, 
-                         int blockSize100k, 
-                         int verbosity,
-                         int workFactor );
-
-

- -Prepares for compression. The bz_stream structure -holds all data pertaining to the compression activity. -A bz_stream structure should be allocated and initialised -prior to the call. -The fields of bz_stream -comprise the entirety of the user-visible data. state -is a pointer to the private data structures required for compression. -

- -Custom memory allocators are supported, via fields bzalloc, -bzfree, -and opaque. The value -opaque is passed to as the first argument to -all calls to bzalloc and bzfree, but is -otherwise ignored by the library. -The call bzalloc ( opaque, n, m ) is expected to return a -pointer p to -n * m bytes of memory, and bzfree ( opaque, p ) -should free -that memory. -

- -If you don't want to use a custom memory allocator, set bzalloc, -bzfree and -opaque to NULL, -and the library will then use the standard malloc/free -routines. -

- -Before calling BZ2_bzCompressInit, fields bzalloc, -bzfree and opaque should -be filled appropriately, as just described. Upon return, the internal -state will have been allocated and initialised, and total_in_lo32, -total_in_hi32, total_out_lo32 and -total_out_hi32 will have been set to zero. -These four fields are used by the library -to inform the caller of the total amount of data passed into and out of -the library, respectively. You should not try to change them. -As of version 1.0, 64-bit counts are maintained, even on 32-bit -platforms, using the _hi32 fields to store the upper 32 bits -of the count. So, for example, the total amount of data in -is (total_in_hi32 << 32) + total_in_lo32. -

- -Parameter blockSize100k specifies the block size to be used for -compression. It should be a value between 1 and 9 inclusive, and the -actual block size used is 100000 x this figure. 9 gives the best -compression but takes most memory. -

- -Parameter verbosity should be set to a number between 0 and 4 -inclusive. 0 is silent, and greater numbers give increasingly verbose -monitoring/debugging output. If the library has been compiled with --DBZ_NO_STDIO, no such output will appear for any verbosity -setting. -

- -Parameter workFactor controls how the compression phase behaves -when presented with worst case, highly repetitive, input data. If -compression runs into difficulties caused by repetitive data, the -library switches from the standard sorting algorithm to a fallback -algorithm. The fallback is slower than the standard algorithm by -perhaps a factor of three, but always behaves reasonably, no matter how -bad the input. -

- -Lower values of workFactor reduce the amount of effort the -standard algorithm will expend before resorting to the fallback. You -should set this parameter carefully; too low, and many inputs will be -handled by the fallback algorithm and so compress rather slowly, too -high, and your average-to-worst case compression times can become very -large. The default value of 30 gives reasonable behaviour over a wide -range of circumstances. -

- -Allowable values range from 0 to 250 inclusive. 0 is a special case, -equivalent to using the default value of 30. -

- -Note that the compressed output generated is the same regardless of -whether or not the fallback algorithm is used. -

- -Be aware also that this parameter may disappear entirely in future -versions of the library. In principle it should be possible to devise a -good way to automatically choose which algorithm to use. Such a -mechanism would render the parameter obsolete. -

- -Possible return values: -
 
      BZ_CONFIG_ERROR
-         if the library has been mis-compiled
-      BZ_PARAM_ERROR 
-         if strm is NULL 
-         or blockSize < 1 or blockSize > 9
-         or verbosity < 0 or verbosity > 4
-         or workFactor < 0 or workFactor > 250
-      BZ_MEM_ERROR 
-         if not enough memory is available
-      BZ_OK 
-         otherwise
-
Allowable next actions: -
 
      BZ2_bzCompress 
-         if BZ_OK is returned
-      no specific action needed in case of error
-

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.3.2 BZ2_bzCompress

- -
 
   int BZ2_bzCompress ( bz_stream *strm, int action );
-
Provides more input and/or output buffer space for the library. The -caller maintains input and output buffers, and calls BZ2_bzCompress to -transfer data between them. -

- -Before each call to BZ2_bzCompress, next_in should point at -the data to be compressed, and avail_in should indicate how many -bytes the library may read. BZ2_bzCompress updates next_in, -avail_in and total_in to reflect the number of bytes it -has read. -

- -Similarly, next_out should point to a buffer in which the -compressed data is to be placed, with avail_out indicating how -much output space is available. BZ2_bzCompress updates -next_out, avail_out and total_out to reflect the -number of bytes output. -

- -You may provide and remove as little or as much data as you like on each -call of BZ2_bzCompress. In the limit, it is acceptable to supply and -remove data one byte at a time, although this would be terribly -inefficient. You should always ensure that at least one byte of output -space is available at each call. -

- -A second purpose of BZ2_bzCompress is to request a change of mode of the -compressed stream. -

- -Conceptually, a compressed stream can be in one of four states: IDLE, -RUNNING, FLUSHING and FINISHING. Before initialisation -(BZ2_bzCompressInit) and after termination (BZ2_bzCompressEnd), a -stream is regarded as IDLE. -

- -Upon initialisation (BZ2_bzCompressInit), the stream is placed in the -RUNNING state. Subsequent calls to BZ2_bzCompress should pass -BZ_RUN as the requested action; other actions are illegal and -will result in BZ_SEQUENCE_ERROR. -

- -At some point, the calling program will have provided all the input data -it wants to. It will then want to finish up -- in effect, asking the -library to process any data it might have buffered internally. In this -state, BZ2_bzCompress will no longer attempt to read data from -next_in, but it will want to write data to next_out. -Because the output buffer supplied by the user can be arbitrarily small, -the finishing-up operation cannot necessarily be done with a single call -of BZ2_bzCompress. -

- -Instead, the calling program passes BZ_FINISH as an action to -BZ2_bzCompress. This changes the stream's state to FINISHING. Any -remaining input (ie, next_in[0 .. avail_in-1]) is compressed and -transferred to the output buffer. To do this, BZ2_bzCompress must be -called repeatedly until all the output has been consumed. At that -point, BZ2_bzCompress returns BZ_STREAM_END, and the stream's -state is set back to IDLE. BZ2_bzCompressEnd should then be -called. -

- -Just to make sure the calling program does not cheat, the library makes -a note of avail_in at the time of the first call to -BZ2_bzCompress which has BZ_FINISH as an action (ie, at the -time the program has announced its intention to not supply any more -input). By comparing this value with that of avail_in over -subsequent calls to BZ2_bzCompress, the library can detect any -attempts to slip in more data to compress. Any calls for which this is -detected will return BZ_SEQUENCE_ERROR. This indicates a -programming mistake which should be corrected. -

- -Instead of asking to finish, the calling program may ask -BZ2_bzCompress to take all the remaining input, compress it and -terminate the current (Burrows-Wheeler) compression block. This could -be useful for error control purposes. The mechanism is analogous to -that for finishing: call BZ2_bzCompress with an action of -BZ_FLUSH, remove output data, and persist with the -BZ_FLUSH action until the value BZ_RUN is returned. As -with finishing, BZ2_bzCompress detects any attempt to provide more -input data once the flush has begun. -

- -Once the flush is complete, the stream returns to the normal RUNNING -state. -

- -This all sounds pretty complex, but isn't really. Here's a table -which shows which actions are allowable in each state, what action -will be taken, what the next state is, and what the non-error return -values are. Note that you can't explicitly ask what state the -stream is in, but nor do you need to -- it can be inferred from the -values returned by BZ2_bzCompress. -
 
IDLE/any           
-      Illegal.  IDLE state only exists after BZ2_bzCompressEnd or
-      before BZ2_bzCompressInit.
-      Return value = BZ_SEQUENCE_ERROR
-
-RUNNING/BZ_RUN     
-      Compress from next_in to next_out as much as possible.
-      Next state = RUNNING
-      Return value = BZ_RUN_OK
-
-RUNNING/BZ_FLUSH   
-      Remember current value of next_in.  Compress from next_in
-      to next_out as much as possible, but do not accept any more input.  
-      Next state = FLUSHING
-      Return value = BZ_FLUSH_OK
-
-RUNNING/BZ_FINISH  
-      Remember current value of next_in.  Compress from next_in
-      to next_out as much as possible, but do not accept any more input.
-      Next state = FINISHING
-      Return value = BZ_FINISH_OK
-
-FLUSHING/BZ_FLUSH  
-      Compress from next_in to next_out as much as possible, 
-      but do not accept any more input.  
-      If all the existing input has been used up and all compressed
-      output has been removed
-         Next state = RUNNING; Return value = BZ_RUN_OK
-      else
-         Next state = FLUSHING; Return value = BZ_FLUSH_OK
-
-FLUSHING/other     
-      Illegal.
-      Return value = BZ_SEQUENCE_ERROR
-
-FINISHING/BZ_FINISH  
-      Compress from next_in to next_out as much as possible,
-      but to not accept any more input.  
-      If all the existing input has been used up and all compressed
-      output has been removed
-         Next state = IDLE; Return value = BZ_STREAM_END
-      else
-         Next state = FINISHING; Return value = BZ_FINISHING
-
-FINISHING/other
-      Illegal.
-      Return value = BZ_SEQUENCE_ERROR
-

- -That still looks complicated? Well, fair enough. The usual sequence -of calls for compressing a load of data is: -

    -
  • Get started with BZ2_bzCompressInit. -
  • Shovel data in and shlurp out its compressed form using zero or more -calls of BZ2_bzCompress with action = BZ_RUN. -
  • Finish up. -Repeatedly call BZ2_bzCompress with action = BZ_FINISH, -copying out the compressed output, until BZ_STREAM_END is returned. -
  • Close up and go home. Call BZ2_bzCompressEnd. -
-If the data you want to compress fits into your input buffer all -at once, you can skip the calls of BZ2_bzCompress ( ..., BZ_RUN ) and -just do the BZ2_bzCompress ( ..., BZ_FINISH ) calls. -

- -All required memory is allocated by BZ2_bzCompressInit. The -compression library can accept any data at all (obviously). So you -shouldn't get any error return values from the BZ2_bzCompress calls. -If you do, they will be BZ_SEQUENCE_ERROR, and indicate a bug in -your programming. -

- -Trivial other possible return values: -
 
      BZ_PARAM_ERROR   
-         if strm is NULL, or strm->s is NULL
-

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.3.3 BZ2_bzCompressEnd

- -
 
int BZ2_bzCompressEnd ( bz_stream *strm );
-
Releases all memory associated with a compression stream. -

- -Possible return values: -
 
   BZ_PARAM_ERROR    if strm is NULL or strm->s is NULL
-   BZ_OK    otherwise
-

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.3.4 BZ2_bzDecompressInit

- -
 
int BZ2_bzDecompressInit ( bz_stream *strm, int verbosity, int small );
-
Prepares for decompression. As with BZ2_bzCompressInit, a -bz_stream record should be allocated and initialised before the -call. Fields bzalloc, bzfree and opaque should be -set if a custom memory allocator is required, or made NULL for -the normal malloc/free routines. Upon return, the internal -state will have been initialised, and total_in and -total_out will be zero. -

- -For the meaning of parameter verbosity, see BZ2_bzCompressInit. -

- -If small is nonzero, the library will use an alternative -decompression algorithm which uses less memory but at the cost of -decompressing more slowly (roughly speaking, half the speed, but the -maximum memory requirement drops to around 2300k). See Chapter 2 for -more information on memory management. -

- -Note that the amount of memory needed to decompress -a stream cannot be determined until the stream's header has been read, -so even if BZ2_bzDecompressInit succeeds, a subsequent -BZ2_bzDecompress could fail with BZ_MEM_ERROR. -

- -Possible return values: -
 
      BZ_CONFIG_ERROR
-         if the library has been mis-compiled
-      BZ_PARAM_ERROR
-         if (small != 0 && small != 1)
-         or (verbosity < 0 || verbosity > 4)
-      BZ_MEM_ERROR
-         if insufficient memory is available
-

- -Allowable next actions: -
 
      BZ2_bzDecompress
-         if BZ_OK was returned
-      no specific action required in case of error
-

- - -

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.3.5 BZ2_bzDecompress

- -
 
int BZ2_bzDecompress ( bz_stream *strm );
-
Provides more input and/out output buffer space for the library. The -caller maintains input and output buffers, and uses BZ2_bzDecompress -to transfer data between them. -

- -Before each call to BZ2_bzDecompress, next_in -should point at the compressed data, -and avail_in should indicate how many bytes the library -may read. BZ2_bzDecompress updates next_in, avail_in -and total_in -to reflect the number of bytes it has read. -

- -Similarly, next_out should point to a buffer in which the uncompressed -output is to be placed, with avail_out indicating how much output space -is available. BZ2_bzCompress updates next_out, -avail_out and total_out to reflect -the number of bytes output. -

- -You may provide and remove as little or as much data as you like on -each call of BZ2_bzDecompress. -In the limit, it is acceptable to -supply and remove data one byte at a time, although this would be -terribly inefficient. You should always ensure that at least one -byte of output space is available at each call. -

- -Use of BZ2_bzDecompress is simpler than BZ2_bzCompress. -

- -You should provide input and remove output as described above, and -repeatedly call BZ2_bzDecompress until BZ_STREAM_END is -returned. Appearance of BZ_STREAM_END denotes that -BZ2_bzDecompress has detected the logical end of the compressed -stream. BZ2_bzDecompress will not produce BZ_STREAM_END until -all output data has been placed into the output buffer, so once -BZ_STREAM_END appears, you are guaranteed to have available all -the decompressed output, and BZ2_bzDecompressEnd can safely be -called. -

- -If case of an error return value, you should call BZ2_bzDecompressEnd -to clean up and release memory. -

- -Possible return values: -
 
      BZ_PARAM_ERROR
-         if strm is NULL or strm->s is NULL
-         or strm->avail_out < 1
-      BZ_DATA_ERROR
-         if a data integrity error is detected in the compressed stream
-      BZ_DATA_ERROR_MAGIC
-         if the compressed stream doesn't begin with the right magic bytes
-      BZ_MEM_ERROR
-         if there wasn't enough memory available
-      BZ_STREAM_END
-         if the logical end of the data stream was detected and all
-         output in has been consumed, eg s->avail_out > 0
-      BZ_OK
-         otherwise
-
Allowable next actions: -
 
      BZ2_bzDecompress
-         if BZ_OK was returned
-      BZ2_bzDecompressEnd
-         otherwise
-

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.3.6 BZ2_bzDecompressEnd

- -
 
int BZ2_bzDecompressEnd ( bz_stream *strm );
-
Releases all memory associated with a decompression stream. -

- -Possible return values: -
 
      BZ_PARAM_ERROR
-         if strm is NULL or strm->s is NULL
-      BZ_OK
-         otherwise
-

- -Allowable next actions: -
 
      None.
-

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.4 High-level interface

- -

- -This interface provides functions for reading and writing -bzip2 format files. First, some general points. -

- -

    -
  • All of the functions take an int* first argument, - bzerror. - After each call, bzerror should be consulted first to determine - the outcome of the call. If bzerror is BZ_OK, - the call completed - successfully, and only then should the return value of the function - (if any) be consulted. If bzerror is BZ_IO_ERROR, - there was an error - reading/writing the underlying compressed file, and you should - then consult errno/perror to determine the - cause of the difficulty. - bzerror may also be set to various other values; precise details are - given on a per-function basis below. -
  • If bzerror indicates an error - (ie, anything except BZ_OK and BZ_STREAM_END), - you should immediately call BZ2_bzReadClose (or BZ2_bzWriteClose, - depending on whether you are attempting to read or to write) - to free up all resources associated - with the stream. Once an error has been indicated, behaviour of all calls - except BZ2_bzReadClose (BZ2_bzWriteClose) is undefined. - The implication is that (1) bzerror should - be checked after each call, and (2) if bzerror indicates an error, - BZ2_bzReadClose (BZ2_bzWriteClose) should then be called to clean up. -
  • The FILE* arguments passed to - BZ2_bzReadOpen/BZ2_bzWriteOpen - should be set to binary mode. - Most Unix systems will do this by default, but other platforms, - including Windows and Mac, will not. If you omit this, you may - encounter problems when moving code to new platforms. -
  • Memory allocation requests are handled by - malloc/free. - At present - there is no facility for user-defined memory allocators in the file I/O - functions (could easily be added, though). -
-

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.4.1 BZ2_bzReadOpen

- -
 
   typedef void BZFILE;
-
-   BZFILE *BZ2_bzReadOpen ( int *bzerror, FILE *f, 
-                            int small, int verbosity,
-                            void *unused, int nUnused );
-
Prepare to read compressed data from file handle f. f -should refer to a file which has been opened for reading, and for which -the error indicator (ferror(f))is not set. If small is 1, -the library will try to decompress using less memory, at the expense of -speed. -

- -For reasons explained below, BZ2_bzRead will decompress the -nUnused bytes starting at unused, before starting to read -from the file f. At most BZ_MAX_UNUSED bytes may be -supplied like this. If this facility is not required, you should pass -NULL and 0 for unused and nUnused -respectively. -

- -For the meaning of parameters small and verbosity, -see BZ2_bzDecompressInit. -

- -The amount of memory needed to decompress a file cannot be determined -until the file's header has been read. So it is possible that -BZ2_bzReadOpen returns BZ_OK but a subsequent call of -BZ2_bzRead will return BZ_MEM_ERROR. -

- -Possible assignments to bzerror: -
 
      BZ_CONFIG_ERROR
-         if the library has been mis-compiled
-      BZ_PARAM_ERROR
-         if f is NULL 
-         or small is neither 0 nor 1                 
-         or (unused == NULL && nUnused != 0)
-         or (unused != NULL && !(0 <= nUnused <= BZ_MAX_UNUSED))
-      BZ_IO_ERROR    
-         if ferror(f) is nonzero
-      BZ_MEM_ERROR   
-         if insufficient memory is available
-      BZ_OK
-         otherwise.
-

- -Possible return values: -
 
      Pointer to an abstract BZFILE        
-         if bzerror is BZ_OK   
-      NULL
-         otherwise
-

- -Allowable next actions: -
 
      BZ2_bzRead
-         if bzerror is BZ_OK   
-      BZ2_bzClose 
-         otherwise
-

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.4.2 BZ2_bzRead

- -
 
   int BZ2_bzRead ( int *bzerror, BZFILE *b, void *buf, int len );
-
Reads up to len (uncompressed) bytes from the compressed file -b into -the buffer buf. If the read was successful, -bzerror is set to BZ_OK -and the number of bytes read is returned. If the logical end-of-stream -was detected, bzerror will be set to BZ_STREAM_END, -and the number -of bytes read is returned. All other bzerror values denote an error. -

- -BZ2_bzRead will supply len bytes, -unless the logical stream end is detected -or an error occurs. Because of this, it is possible to detect the -stream end by observing when the number of bytes returned is -less than the number -requested. Nevertheless, this is regarded as inadvisable; you should -instead check bzerror after every call and watch out for -BZ_STREAM_END. -

- -Internally, BZ2_bzRead copies data from the compressed file in chunks -of size BZ_MAX_UNUSED bytes -before decompressing it. If the file contains more bytes than strictly -needed to reach the logical end-of-stream, BZ2_bzRead will almost certainly -read some of the trailing data before signalling BZ_SEQUENCE_END. -To collect the read but unused data once BZ_SEQUENCE_END has -appeared, call BZ2_bzReadGetUnused immediately before BZ2_bzReadClose. -

- -Possible assignments to bzerror: -
 
      BZ_PARAM_ERROR
-         if b is NULL or buf is NULL or len < 0
-      BZ_SEQUENCE_ERROR 
-         if b was opened with BZ2_bzWriteOpen
-      BZ_IO_ERROR 
-         if there is an error reading from the compressed file
-      BZ_UNEXPECTED_EOF 
-         if the compressed file ended before the logical end-of-stream was detected
-      BZ_DATA_ERROR 
-         if a data integrity error was detected in the compressed stream
-      BZ_DATA_ERROR_MAGIC
-         if the stream does not begin with the requisite header bytes (ie, is not 
-         a bzip2 data file).  This is really a special case of BZ_DATA_ERROR.
-      BZ_MEM_ERROR 
-         if insufficient memory was available
-      BZ_STREAM_END 
-         if the logical end of stream was detected.
-      BZ_OK
-         otherwise.
-

- -Possible return values: -
 
      number of bytes read
-         if bzerror is BZ_OK or BZ_STREAM_END
-      undefined
-         otherwise
-

- -Allowable next actions: -
 
      collect data from buf, then BZ2_bzRead or BZ2_bzReadClose
-         if bzerror is BZ_OK 
-      collect data from buf, then BZ2_bzReadClose or BZ2_bzReadGetUnused 
-         if bzerror is BZ_SEQUENCE_END   
-      BZ2_bzReadClose 
-         otherwise
-

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.4.3 BZ2_bzReadGetUnused

- -
 
   void BZ2_bzReadGetUnused ( int* bzerror, BZFILE *b, 
-                              void** unused, int* nUnused );
-
Returns data which was read from the compressed file but was not needed -to get to the logical end-of-stream. *unused is set to the address -of the data, and *nUnused to the number of bytes. *nUnused will -be set to a value between 0 and BZ_MAX_UNUSED inclusive. -

- -This function may only be called once BZ2_bzRead has signalled -BZ_STREAM_END but before BZ2_bzReadClose. -

- -Possible assignments to bzerror: -
 
      BZ_PARAM_ERROR 
-         if b is NULL 
-         or unused is NULL or nUnused is NULL
-      BZ_SEQUENCE_ERROR 
-         if BZ_STREAM_END has not been signalled
-         or if b was opened with BZ2_bzWriteOpen
-     BZ_OK
-         otherwise
-

- -Allowable next actions: -
 
      BZ2_bzReadClose
-

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.4.4 BZ2_bzReadClose

- -
 
   void BZ2_bzReadClose ( int *bzerror, BZFILE *b );
-
Releases all memory pertaining to the compressed file b. -BZ2_bzReadClose does not call fclose on the underlying file -handle, so you should do that yourself if appropriate. -BZ2_bzReadClose should be called to clean up after all error -situations. -

- -Possible assignments to bzerror: -
 
      BZ_SEQUENCE_ERROR 
-         if b was opened with BZ2_bzOpenWrite 
-      BZ_OK 
-         otherwise
-

- -Allowable next actions: -
 
      none
-

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.4.5 BZ2_bzWriteOpen

- -
 
   BZFILE *BZ2_bzWriteOpen ( int *bzerror, FILE *f, 
-                             int blockSize100k, int verbosity,
-                             int workFactor );
-
Prepare to write compressed data to file handle f. -f should refer to -a file which has been opened for writing, and for which the error -indicator (ferror(f))is not set. -

- -For the meaning of parameters blockSize100k, -verbosity and workFactor, see -
BZ2_bzCompressInit. -

- -All required memory is allocated at this stage, so if the call -completes successfully, BZ_MEM_ERROR cannot be signalled by a -subsequent call to BZ2_bzWrite. -

- -Possible assignments to bzerror: -
 
      BZ_CONFIG_ERROR
-         if the library has been mis-compiled
-      BZ_PARAM_ERROR 
-         if f is NULL 
-         or blockSize100k < 1 or blockSize100k > 9
-      BZ_IO_ERROR 
-         if ferror(f) is nonzero
-      BZ_MEM_ERROR 
-         if insufficient memory is available
-      BZ_OK 
-         otherwise
-

- -Possible return values: -
 
      Pointer to an abstract BZFILE  
-         if bzerror is BZ_OK   
-      NULL 
-         otherwise
-

- -Allowable next actions: -
 
      BZ2_bzWrite 
-         if bzerror is BZ_OK 
-         (you could go directly to BZ2_bzWriteClose, but this would be pretty pointless)
-      BZ2_bzWriteClose 
-         otherwise
-

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.4.6 BZ2_bzWrite

- -
 
   void BZ2_bzWrite ( int *bzerror, BZFILE *b, void *buf, int len );
-
Absorbs len bytes from the buffer buf, eventually to be -compressed and written to the file. -

- -Possible assignments to bzerror: -
 
      BZ_PARAM_ERROR 
-         if b is NULL or buf is NULL or len < 0
-      BZ_SEQUENCE_ERROR 
-         if b was opened with BZ2_bzReadOpen
-      BZ_IO_ERROR 
-         if there is an error writing the compressed file.
-      BZ_OK 
-         otherwise
-

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.4.7 BZ2_bzWriteClose

- -
 
   void BZ2_bzWriteClose ( int *bzerror, BZFILE* f,
-                           int abandon,
-                           unsigned int* nbytes_in,
-                           unsigned int* nbytes_out );
-
-   void BZ2_bzWriteClose64 ( int *bzerror, BZFILE* f,
-                             int abandon,
-                             unsigned int* nbytes_in_lo32,
-                             unsigned int* nbytes_in_hi32,
-                             unsigned int* nbytes_out_lo32,
-                             unsigned int* nbytes_out_hi32 );
-

- -Compresses and flushes to the compressed file all data so far supplied -by BZ2_bzWrite. The logical end-of-stream markers are also written, so -subsequent calls to BZ2_bzWrite are illegal. All memory associated -with the compressed file b is released. -fflush is called on the -compressed file, but it is not fclose'd. -

- -If BZ2_bzWriteClose is called to clean up after an error, the only -action is to release the memory. The library records the error codes -issued by previous calls, so this situation will be detected -automatically. There is no attempt to complete the compression -operation, nor to fflush the compressed file. You can force this -behaviour to happen even in the case of no error, by passing a nonzero -value to abandon. -

- -If nbytes_in is non-null, *nbytes_in will be set to be the -total volume of uncompressed data handled. Similarly, nbytes_out -will be set to the total volume of compressed data written. For -compatibility with older versions of the library, BZ2_bzWriteClose -only yields the lower 32 bits of these counts. Use -BZ2_bzWriteClose64 if you want the full 64 bit counts. These -two functions are otherwise absolutely identical. -

- -Possible assignments to bzerror: -
 
      BZ_SEQUENCE_ERROR 
-         if b was opened with BZ2_bzReadOpen
-      BZ_IO_ERROR 
-         if there is an error writing the compressed file
-      BZ_OK 
-         otherwise
-

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.4.8 Handling embedded compressed data streams

- -

- -The high-level library facilitates use of -bzip2 data streams which form some part of a surrounding, larger -data stream. -

    -
  • For writing, the library takes an open file handle, writes -compressed data to it, fflushes it but does not fclose it. -The calling application can write its own data before and after the -compressed data stream, using that same file handle. -
  • Reading is more complex, and the facilities are not as general -as they could be since generality is hard to reconcile with efficiency. -BZ2_bzRead reads from the compressed file in blocks of size -BZ_MAX_UNUSED bytes, and in doing so probably will overshoot -the logical end of compressed stream. -To recover this data once decompression has -ended, call BZ2_bzReadGetUnused after the last call of BZ2_bzRead -(the one returning BZ_STREAM_END) but before calling -BZ2_bzReadClose. -
-

- -This mechanism makes it easy to decompress multiple bzip2 -streams placed end-to-end. As the end of one stream, when BZ2_bzRead -returns BZ_STREAM_END, call BZ2_bzReadGetUnused to collect the -unused data (copy it into your own buffer somewhere). -That data forms the start of the next compressed stream. -To start uncompressing that next stream, call BZ2_bzReadOpen again, -feeding in the unused data via the unused/nUnused -parameters. -Keep doing this until BZ_STREAM_END return coincides with the -physical end of file (feof(f)). In this situation -BZ2_bzReadGetUnused -will of course return no data. -

- -This should give some feel for how the high-level interface can be used. -If you require extra flexibility, you'll have to bite the bullet and get -to grips with the low-level interface. -

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.4.9 Standard file-reading/writing code

- -Here's how you'd write data to a compressed file: -
 
FILE*   f;
-BZFILE* b;
-int     nBuf;
-char    buf[ /* whatever size you like */ ];
-int     bzerror;
-int     nWritten;
-
-f = fopen ( "myfile.bz2", "w" );
-if (!f) {
-   /* handle error */
-}
-b = BZ2_bzWriteOpen ( &bzerror, f, 9 );
-if (bzerror != BZ_OK) {
-   BZ2_bzWriteClose ( b );
-   /* handle error */
-}
-
-while ( /* condition */ ) {
-   /* get data to write into buf, and set nBuf appropriately */
-   nWritten = BZ2_bzWrite ( &bzerror, b, buf, nBuf );
-   if (bzerror == BZ_IO_ERROR) { 
-      BZ2_bzWriteClose ( &bzerror, b );
-      /* handle error */
-   }
-}
-
-BZ2_bzWriteClose ( &bzerror, b );
-if (bzerror == BZ_IO_ERROR) {
-   /* handle error */
-}
-
And to read from a compressed file: -
 
FILE*   f;
-BZFILE* b;
-int     nBuf;
-char    buf[ /* whatever size you like */ ];
-int     bzerror;
-int     nWritten;
-
-f = fopen ( "myfile.bz2", "r" );
-if (!f) {
-   /* handle error */
-}
-b = BZ2_bzReadOpen ( &bzerror, f, 0, NULL, 0 );
-if (bzerror != BZ_OK) {
-   BZ2_bzReadClose ( &bzerror, b );
-   /* handle error */
-}
-
-bzerror = BZ_OK;
-while (bzerror == BZ_OK && /* arbitrary other conditions */) {
-   nBuf = BZ2_bzRead ( &bzerror, b, buf, /* size of buf */ );
-   if (bzerror == BZ_OK) {
-      /* do something with buf[0 .. nBuf-1] */
-   }
-}
-if (bzerror != BZ_STREAM_END) {
-   BZ2_bzReadClose ( &bzerror, b );
-   /* handle error */
-} else {
-   BZ2_bzReadClose ( &bzerror );
-}
-

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.5 Utility functions

- -
- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.5.1 BZ2_bzBuffToBuffCompress

- -
 
   int BZ2_bzBuffToBuffCompress( char*         dest,
-                                 unsigned int* destLen,
-                                 char*         source,
-                                 unsigned int  sourceLen,
-                                 int           blockSize100k,
-                                 int           verbosity,
-                                 int           workFactor );
-
Attempts to compress the data in source[0 .. sourceLen-1] -into the destination buffer, dest[0 .. *destLen-1]. -If the destination buffer is big enough, *destLen is -set to the size of the compressed data, and BZ_OK is -returned. If the compressed data won't fit, *destLen -is unchanged, and BZ_OUTBUFF_FULL is returned. -

- -Compression in this manner is a one-shot event, done with a single call -to this function. The resulting compressed data is a complete -bzip2 format data stream. There is no mechanism for making -additional calls to provide extra input data. If you want that kind of -mechanism, use the low-level interface. -

- -For the meaning of parameters blockSize100k, verbosity -and workFactor,
see BZ2_bzCompressInit. -

- -To guarantee that the compressed data will fit in its buffer, allocate -an output buffer of size 1% larger than the uncompressed data, plus -six hundred extra bytes. -

- -BZ2_bzBuffToBuffDecompress will not write data at or -beyond dest[*destLen], even in case of buffer overflow. -

- -Possible return values: -
 
      BZ_CONFIG_ERROR
-         if the library has been mis-compiled
-      BZ_PARAM_ERROR 
-         if dest is NULL or destLen is NULL
-         or blockSize100k < 1 or blockSize100k > 9
-         or verbosity < 0 or verbosity > 4 
-         or workFactor < 0 or workFactor > 250
-      BZ_MEM_ERROR
-         if insufficient memory is available 
-      BZ_OUTBUFF_FULL
-         if the size of the compressed data exceeds *destLen
-      BZ_OK 
-         otherwise
-

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.5.2 BZ2_bzBuffToBuffDecompress

- -
 
   int BZ2_bzBuffToBuffDecompress ( char*         dest,
-                                    unsigned int* destLen,
-                                    char*         source,
-                                    unsigned int  sourceLen,
-                                    int           small,
-                                    int           verbosity );
-
Attempts to decompress the data in source[0 .. sourceLen-1] -into the destination buffer, dest[0 .. *destLen-1]. -If the destination buffer is big enough, *destLen is -set to the size of the uncompressed data, and BZ_OK is -returned. If the compressed data won't fit, *destLen -is unchanged, and BZ_OUTBUFF_FULL is returned. -

- -source is assumed to hold a complete bzip2 format -data stream.
BZ2_bzBuffToBuffDecompress tries to decompress -the entirety of the stream into the output buffer. -

- -For the meaning of parameters small and verbosity, -see BZ2_bzDecompressInit. -

- -Because the compression ratio of the compressed data cannot be known in -advance, there is no easy way to guarantee that the output buffer will -be big enough. You may of course make arrangements in your code to -record the size of the uncompressed data, but such a mechanism is beyond -the scope of this library. -

- -BZ2_bzBuffToBuffDecompress will not write data at or -beyond dest[*destLen], even in case of buffer overflow. -

- -Possible return values: -
 
      BZ_CONFIG_ERROR
-         if the library has been mis-compiled
-      BZ_PARAM_ERROR 
-         if dest is NULL or destLen is NULL
-         or small != 0 && small != 1
-         or verbosity < 0 or verbosity > 4 
-      BZ_MEM_ERROR
-         if insufficient memory is available 
-      BZ_OUTBUFF_FULL
-         if the size of the compressed data exceeds *destLen
-      BZ_DATA_ERROR
-         if a data integrity error was detected in the compressed data
-      BZ_DATA_ERROR_MAGIC
-         if the compressed data doesn't begin with the right magic bytes
-      BZ_UNEXPECTED_EOF
-         if the compressed data ends unexpectedly
-      BZ_OK 
-         otherwise
-

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.6 zlib compatibility functions

- -Yoshioka Tsuneo has contributed some functions to -give better zlib compatibility. These functions are -BZ2_bzopen, BZ2_bzread, BZ2_bzwrite, BZ2_bzflush, -BZ2_bzclose, -BZ2_bzerror and BZ2_bzlibVersion. -These functions are not (yet) officially part of -the library. If they break, you get to keep all the pieces. -Nevertheless, I think they work ok. -
 
typedef void BZFILE;
-
-const char * BZ2_bzlibVersion ( void );
-
Returns a string indicating the library version. -
 
BZFILE * BZ2_bzopen  ( const char *path, const char *mode );
-BZFILE * BZ2_bzdopen ( int        fd,    const char *mode );
-
Opens a .bz2 file for reading or writing, using either its name -or a pre-existing file descriptor. -Analogous to fopen and fdopen. -
 
int BZ2_bzread  ( BZFILE* b, void* buf, int len );
-int BZ2_bzwrite ( BZFILE* b, void* buf, int len );
-
Reads/writes data from/to a previously opened BZFILE. -Analogous to fread and fwrite. -
 
int  BZ2_bzflush ( BZFILE* b );
-void BZ2_bzclose ( BZFILE* b );
-
Flushes/closes a BZFILE. BZ2_bzflush doesn't actually do -anything. Analogous to fflush and fclose. -

- -
 
const char * BZ2_bzerror ( BZFILE *b, int *errnum )
-
Returns a string describing the more recent error status of -b, and also sets *errnum to its numerical value. -

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.7 Using the library in a stdio-free environment

- -

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.7.1 Getting rid of stdio

- -

- -In a deeply embedded application, you might want to use just -the memory-to-memory functions. You can do this conveniently -by compiling the library with preprocessor symbol BZ_NO_STDIO -defined. Doing this gives you a library containing only the following -eight functions: -

- -BZ2_bzCompressInit, BZ2_bzCompress, BZ2_bzCompressEnd
-BZ2_bzDecompressInit, BZ2_bzDecompress, BZ2_bzDecompressEnd
-BZ2_bzBuffToBuffCompress, BZ2_bzBuffToBuffDecompress -

- -When compiled like this, all functions will ignore verbosity -settings. -

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.7.2 Critical error handling

- -libbzip2 contains a number of internal assertion checks which -should, needless to say, never be activated. Nevertheless, if an -assertion should fail, behaviour depends on whether or not the library -was compiled with BZ_NO_STDIO set. -

- -For a normal compile, an assertion failure yields the message -
 
   bzip2/libbzip2: internal error number N.
-   This is a bug in bzip2/libbzip2, 1.0.2, 30-Dec-2001.
-   Please report it to me at: jseward@acm.org.  If this happened
-   when you were using some program which uses libbzip2 as a
-   component, you should also report this bug to the author(s)
-   of that program.  Please make an effort to report this bug;
-   timely and accurate bug reports eventually lead to higher
-   quality software.  Thanks.  Julian Seward, 30 December 2001.
-
where N is some error code number. If N == 1007, it also -prints some extra text advising the reader that unreliable memory is -often associated with internal error 1007. (This is a -frequently-observed-phenomenon with versions 1.0.0/1.0.1). -

- -exit(3) is then called. -

- -For a stdio-free library, assertion failures result -in a call to a function declared as: -
 
   extern void bz_internal_error ( int errcode );
-
The relevant code is passed as a parameter. You should supply -such a function. -

- -In either case, once an assertion failure has occurred, any -bz_stream records involved can be regarded as invalid. -You should not attempt to resume normal operation with them. -

- -You may, of course, change critical error handling to suit -your needs. As I said above, critical errors indicate bugs -in the library and should not occur. All "normal" error -situations are indicated via error return codes from functions, -and can be recovered from. -

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

3.8 Making a Windows DLL

- -Everything related to Windows has been contributed by Yoshioka Tsuneo -
(QWF00133@niftyserve.or.jp / -tsuneo-y@is.aist-nara.ac.jp), so you should send your queries to -him (but perhaps Cc: me, jseward@acm.org). -

- -My vague understanding of what to do is: using Visual C++ 5.0, -open the project file libbz2.dsp, and build. That's all. -

- -If you can't -open the project file for some reason, make a new one, naming these files: -blocksort.c, bzlib.c, compress.c, -crctable.c, decompress.c, huffman.c,
-randtable.c and libbz2.def. You will also need -to name the header files bzlib.h and bzlib_private.h. -

- -If you don't use VC++, you may need to define the proprocessor symbol -_WIN32. -

- -Finally, dlltest.c is a sample program using the DLL. It has a -project file, dlltest.dsp. -

- -If you just want a makefile for Visual C, have a look at -makefile.msc. -

- -Be aware that if you compile bzip2 itself on Win32, you must set -BZ_UNIX to 0 and BZ_LCCWIN32 to 1, in the file -bzip2.c, before compiling. Otherwise the resulting binary won't -work correctly. -

- -I haven't tried any of this stuff myself, but it all looks plausible. -

- -


- - - - - - - -
[ << ][ >> ]           [Top][Contents][Index][ ? ]
-
- -This document was generated -by Julian Seward on January, 5 2002 -using texi2html - - - diff --git a/manual_4.html b/manual_4.html deleted file mode 100644 index fbe3938..0000000 --- a/manual_4.html +++ /dev/null @@ -1,530 +0,0 @@ - - - - - -Untitled Document: 4. Miscellanea - - - - - - - - - - - - - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

4. Miscellanea

- -

- -These are just some random thoughts of mine. Your mileage may -vary. -

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

4.1 Limitations of the compressed file format

- -bzip2-1.0, 0.9.5 and 0.9.0 -use exactly the same file format as the previous -version, bzip2-0.1. This decision was made in the interests of -stability. Creating yet another incompatible compressed file format -would create further confusion and disruption for users. -

- -Nevertheless, this is not a painless decision. Development -work since the release of bzip2-0.1 in August 1997 -has shown complexities in the file format which slow down -decompression and, in retrospect, are unnecessary. These are: -

    -
  • The run-length encoder, which is the first of the - compression transformations, is entirely irrelevant. - The original purpose was to protect the sorting algorithm - from the very worst case input: a string of repeated - symbols. But algorithm steps Q6a and Q6b in the original - Burrows-Wheeler technical report (SRC-124) show how - repeats can be handled without difficulty in block - sorting. -
  • The randomisation mechanism doesn't really need to be - there. Udi Manber and Gene Myers published a suffix - array construction algorithm a few years back, which - can be employed to sort any block, no matter how - repetitive, in O(N log N) time. Subsequent work by - Kunihiko Sadakane has produced a derivative O(N (log N)^2) - algorithm which usually outperforms the Manber-Myers - algorithm. -

    - - I could have changed to Sadakane's algorithm, but I find - it to be slower than bzip2's existing algorithm for - most inputs, and the randomisation mechanism protects - adequately against bad cases. I didn't think it was - a good tradeoff to make. Partly this is due to the fact - that I was not flooded with email complaints about - bzip2-0.1's performance on repetitive data, so - perhaps it isn't a problem for real inputs. -

    - - Probably the best long-term solution, - and the one I have incorporated into 0.9.5 and above, - is to use the existing sorting - algorithm initially, and fall back to a O(N (log N)^2) - algorithm if the standard algorithm gets into difficulties. -

  • The compressed file format was never designed to be - handled by a library, and I have had to jump though - some hoops to produce an efficient implementation of - decompression. It's a bit hairy. Try passing - decompress.c through the C preprocessor - and you'll see what I mean. Much of this complexity - could have been avoided if the compressed size of - each block of data was recorded in the data stream. -
  • An Adler-32 checksum, rather than a CRC32 checksum, - would be faster to compute. -
-It would be fair to say that the bzip2 format was frozen -before I properly and fully understood the performance -consequences of doing so. -

- -Improvements which I was able to incorporate into -0.9.0, despite using the same file format, are: -

    -
  • Single array implementation of the inverse BWT. This - significantly speeds up decompression, presumably - because it reduces the number of cache misses. -
  • Faster inverse MTF transform for large MTF values. The - new implementation is based on the notion of sliding blocks - of values. -
  • bzip2-0.9.0 now reads and writes files with fread - and fwrite; version 0.1 used putc and getc. - Duh! Well, you live and learn. -

    - -

-Further ahead, it would be nice -to be able to do random access into files. This will -require some careful design of compressed file formats. -

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

4.2 Portability issues

- -After some consideration, I have decided not to use -GNU autoconf to configure 0.9.5 or 1.0. -

- -autoconf, admirable and wonderful though it is, -mainly assists with portability problems between Unix-like -platforms. But bzip2 doesn't have much in the way -of portability problems on Unix; most of the difficulties appear -when porting to the Mac, or to Microsoft's operating systems. -autoconf doesn't help in those cases, and brings in a -whole load of new complexity. -

- -Most people should be able to compile the library and program -under Unix straight out-of-the-box, so to speak, especially -if you have a version of GNU C available. -

- -There are a couple of __inline__ directives in the code. GNU C -(gcc) should be able to handle them. If you're not using -GNU C, your C compiler shouldn't see them at all. -If your compiler does, for some reason, see them and doesn't -like them, just #define __inline__ to be /* */. One -easy way to do this is to compile with the flag -D__inline__=, -which should be understood by most Unix compilers. -

- -If you still have difficulties, try compiling with the macro -BZ_STRICT_ANSI defined. This should enable you to build the -library in a strictly ANSI compliant environment. Building the program -itself like this is dangerous and not supported, since you remove -bzip2's checks against compressing directories, symbolic links, -devices, and other not-really-a-file entities. This could cause -filesystem corruption! -

- -One other thing: if you create a bzip2 binary for public -distribution, please try and link it statically (gcc -s). This -avoids all sorts of library-version issues that others may encounter -later on. -

- -If you build bzip2 on Win32, you must set BZ_UNIX to 0 and -BZ_LCCWIN32 to 1, in the file bzip2.c, before compiling. -Otherwise the resulting binary won't work correctly. -

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

4.3 Reporting bugs

- -I tried pretty hard to make sure bzip2 is -bug free, both by design and by testing. Hopefully -you'll never need to read this section for real. -

- -Nevertheless, if bzip2 dies with a segmentation -fault, a bus error or an internal assertion failure, it -will ask you to email me a bug report. Experience with -version 0.1 shows that almost all these problems can -be traced to either compiler bugs or hardware problems. -

    -
  • -Recompile the program with no optimisation, and see if it -works. And/or try a different compiler. -I heard all sorts of stories about various flavours -of GNU C (and other compilers) generating bad code for -bzip2, and I've run across two such examples myself. -

    - -2.7.X versions of GNU C are known to generate bad code from -time to time, at high optimisation levels. -If you get problems, try using the flags --O2 -fomit-frame-pointer -fno-strength-reduce. -You should specifically not use -funroll-loops. -

    - -You may notice that the Makefile runs six tests as part of -the build process. If the program passes all of these, it's -a pretty good (but not 100%) indication that the compiler has -done its job correctly. -

  • -If bzip2 crashes randomly, and the crashes are not -repeatable, you may have a flaky memory subsystem. bzip2 -really hammers your memory hierarchy, and if it's a bit marginal, -you may get these problems. Ditto if your disk or I/O subsystem -is slowly failing. Yup, this really does happen. -

    - -Try using a different machine of the same type, and see if -you can repeat the problem. -

  • This isn't really a bug, but ... If bzip2 tells -you your file is corrupted on decompression, and you -obtained the file via FTP, there is a possibility that you -forgot to tell FTP to do a binary mode transfer. That absolutely -will cause the file to be non-decompressible. You'll have to transfer -it again. -
-

- -If you've incorporated libbzip2 into your own program -and are getting problems, please, please, please, check that the -parameters you are passing in calls to the library, are -correct, and in accordance with what the documentation says -is allowable. I have tried to make the library robust against -such problems, but I'm sure I haven't succeeded. -

- -Finally, if the above comments don't help, you'll have to send -me a bug report. Now, it's just amazing how many people will -send me a bug report saying something like -
 
   bzip2 crashed with segmentation fault on my machine
-
and absolutely nothing else. Needless to say, a such a report -is totally, utterly, completely and comprehensively 100% useless; -a waste of your time, my time, and net bandwidth. -With no details at all, there's no way I can possibly begin -to figure out what the problem is. -

- -The rules of the game are: facts, facts, facts. Don't omit -them because "oh, they won't be relevant". At the bare -minimum: -
 
   Machine type.  Operating system version.  
-   Exact version of bzip2 (do bzip2 -V).  
-   Exact version of the compiler used.  
-   Flags passed to the compiler.
-
However, the most important single thing that will help me is -the file that you were trying to compress or decompress at the -time the problem happened. Without that, my ability to do anything -more than speculate about the cause, is limited. -

- -Please remember that I connect to the Internet with a modem, so -you should contact me before mailing me huge files. -

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

4.4 Did you get the right package?

- -

- -bzip2 is a resource hog. It soaks up large amounts of CPU cycles -and memory. Also, it gives very large latencies. In the worst case, you -can feed many megabytes of uncompressed data into the library before -getting any compressed output, so this probably rules out applications -requiring interactive behaviour. -

- -These aren't faults of my implementation, I hope, but more -an intrinsic property of the Burrows-Wheeler transform (unfortunately). -Maybe this isn't what you want. -

- -If you want a compressor and/or library which is faster, uses less -memory but gets pretty good compression, and has minimal latency, -consider Jean-loup -Gailly's and Mark Adler's work, zlib-1.1.3 and -gzip-1.2.4. Look for them at -

- -http://www.zlib.org and -http://www.gzip.org respectively. -

- -For something faster and lighter still, you might try Markus F X J -Oberhumer's LZO real-time compression/decompression library, at -
http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html. -

- -If you want to use the bzip2 algorithms to compress small blocks -of data, 64k bytes or smaller, for example on an on-the-fly disk -compressor, you'd be well advised not to use this library. Instead, -I've made a special library tuned for that kind of use. It's part of -e2compr-0.40, an on-the-fly disk compressor for the Linux -ext2 filesystem. Look at -http://www.netspace.net.au/~reiter/e2compr. -

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

4.5 Testing

- -

- -A record of the tests I've done. -

- -First, some data sets: -

    -
  • B: a directory containing 6001 files, one for every length in the - range 0 to 6000 bytes. The files contain random lowercase - letters. 18.7 megabytes. -
  • H: my home directory tree. Documents, source code, mail files, - compressed data. H contains B, and also a directory of - files designed as boundary cases for the sorting; mostly very - repetitive, nasty files. 565 megabytes. -
  • A: directory tree holding various applications built from source: - egcs, gcc-2.8.1, KDE, GTK, Octave, etc. - 2200 megabytes. -
-The tests conducted are as follows. Each test means compressing -(a copy of) each file in the data set, decompressing it and -comparing it against the original. -

- -First, a bunch of tests with block sizes and internal buffer -sizes set very small, -to detect any problems with the -blocking and buffering mechanisms. -This required modifying the source code so as to try to -break it. -

    -
  1. Data set H, with - buffer size of 1 byte, and block size of 23 bytes. -
  2. Data set B, buffer sizes 1 byte, block size 1 byte. -
  3. As (2) but small-mode decompression. -
  4. As (2) with block size 2 bytes. -
  5. As (2) with block size 3 bytes. -
  6. As (2) with block size 4 bytes. -
  7. As (2) with block size 5 bytes. -
  8. As (2) with block size 6 bytes and small-mode decompression. -
  9. H with buffer size of 1 byte, but normal block - size (up to 900000 bytes). -
-Then some tests with unmodified source code. -
    -
  1. H, all settings normal. -
  2. As (1), with small-mode decompress. -
  3. H, compress with flag -1. -
  4. H, compress with flag -s, decompress with flag -s. -
  5. Forwards compatibility: H, bzip2-0.1pl2 compressing, - bzip2-0.9.5 decompressing, all settings normal. -
  6. Backwards compatibility: H, bzip2-0.9.5 compressing, - bzip2-0.1pl2 decompressing, all settings normal. -
  7. Bigger tests: A, all settings normal. -
  8. As (7), using the fallback (Sadakane-like) sorting algorithm. -
  9. As (8), compress with flag -1, decompress with flag - -s. -
  10. H, using the fallback sorting algorithm. -
  11. Forwards compatibility: A, bzip2-0.1pl2 compressing, - bzip2-0.9.5 decompressing, all settings normal. -
  12. Backwards compatibility: A, bzip2-0.9.5 compressing, - bzip2-0.1pl2 decompressing, all settings normal. -
  13. Misc test: about 400 megabytes of .tar files with - bzip2 compiled with Checker (a memory access error - detector, like Purify). -
  14. Misc tests to make sure it builds and runs ok on non-Linux/x86 - platforms. -
-These tests were conducted on a 225 MHz IDT WinChip machine, running -Linux 2.0.36. They represent nearly a week of continuous computation. -All tests completed successfully. -

- -


- - - - - - - - - - - -
[ < ][ > ]   [ << ][ Up ][ >> ]         [Top][Contents][Index][ ? ]
-

4.6 Further reading

- -bzip2 is not research work, in the sense that it doesn't present -any new ideas. Rather, it's an engineering exercise based on existing -ideas. -

- -Four documents describe essentially all the ideas behind bzip2: -
 
Michael Burrows and D. J. Wheeler:
-  "A block-sorting lossless data compression algorithm"
-   10th May 1994. 
-   Digital SRC Research Report 124.
-   ftp://ftp.digital.com/pub/DEC/SRC/research-reports/SRC-124.ps.gz
-   If you have trouble finding it, try searching at the
-   New Zealand Digital Library, http://www.nzdl.org.
-
-Daniel S. Hirschberg and Debra A. LeLewer
-  "Efficient Decoding of Prefix Codes"
-   Communications of the ACM, April 1990, Vol 33, Number 4.
-   You might be able to get an electronic copy of this
-      from the ACM Digital Library.
-
-David J. Wheeler
-   Program bred3.c and accompanying document bred3.ps.
-   This contains the idea behind the multi-table Huffman
-   coding scheme.
-   ftp://ftp.cl.cam.ac.uk/users/djw3/
-
-Jon L. Bentley and Robert Sedgewick
-  "Fast Algorithms for Sorting and Searching Strings"
-   Available from Sedgewick's web page,
-   www.cs.princeton.edu/~rs
-
The following paper gives valuable additional insights into the -algorithm, but is not immediately the basis of any code -used in bzip2. -
 
Peter Fenwick:
-   Block Sorting Text Compression
-   Proceedings of the 19th Australasian Computer Science Conference,
-     Melbourne, Australia.  Jan 31 - Feb 2, 1996.
-   ftp://ftp.cs.auckland.ac.nz/pub/peter-f/ACSC96paper.ps
-
Kunihiko Sadakane's sorting algorithm, mentioned above, -is available from: -
 
http://naomi.is.s.u-tokyo.ac.jp/~sada/papers/Sada98b.ps.gz
-
The Manber-Myers suffix array construction -algorithm is described in a paper -available from: -
 
http://www.cs.arizona.edu/people/gene/PAPERS/suffix.ps
-
Finally, the following paper documents some recent investigations -I made into the performance of sorting algorithms: -
 
Julian Seward:
-   On the Performance of BWT Sorting Algorithms
-   Proceedings of the IEEE Data Compression Conference 2000
-     Snowbird, Utah.  28-30 March 2000.
-

- -


- - - - - - - -
[ << ][ >> ]           [Top][Contents][Index][ ? ]
-
- -This document was generated -by Julian Seward on January, 5 2002 -using texi2html - - - diff --git a/manual_abt.html b/manual_abt.html deleted file mode 100644 index d7f5472..0000000 --- a/manual_abt.html +++ /dev/null @@ -1,201 +0,0 @@ - - - - - -Untitled Document: About this document - - - - - - - - - - - - - - - - - -
[Top][Contents][Index][ ? ]
-

About this document

-This document was generated by Julian Seward on January, 5 2002 -using texi2html -

-The buttons in the navigation panels have the following meaning: -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Button Name Go to From 1.2.3 go to
- [ < ] -Back - -previous section in reading order - -1.2.2 -
- [ > ] -Forward - -next section in reading order - -1.2.4 -
- [ << ] -FastBack - -previous or up-and-previous section - -1.1 -
- [ Up ] -Up - -up section - -1.2 -
- [ >> ] -FastForward - -next or up-and-next section - -1.3 -
- [Top] -Top - -cover (top) of document - -   -
- [Contents] -Contents - -table of contents - -   -
- [Index] -Index - -concept index - -   -
- [ ? ] -About - -this page - -   -
-

-where the Example assumes that the current position -is at Subsubsection One-Two-Three of a document of -the following structure: -
    -
  • 1. Section One
  • -
      -
    • 1.1 Subsection One-One
    • -
        -
      • ...
      • -
      -
    • 1.2 Subsection One-Two
    • -
        -
      • 1.2.1 Subsubsection One-Two-One -
      • 1.2.2 Subsubsection One-Two-Two -
      • 1.2.3 Subsubsection One-Two-Three     -<== Current Position -
      • 1.2.4 Subsubsection One-Two-Four -
      -
    • 1.3 Subsection One-Three
    • -
        -
      • ...
      • -
      -
    • 1.4 Subsection One-Four
    • -
    -
- -
-
- -This document was generated -by Julian Seward on January, 5 2002 -using texi2html - - - diff --git a/manual_ovr.html b/manual_ovr.html deleted file mode 100644 index 3b102b9..0000000 --- a/manual_ovr.html +++ /dev/null @@ -1,54 +0,0 @@ - - - - - -Untitled Document: Short Table of Contents - - - - - - - - - - - - - - - - - -
[Top][Contents][Index][ ? ]
-

Short Table of Contents

-
-1. Introduction -
-2. How to use bzip2 -
-3. Programming with libbzip2 -
-4. Miscellanea -
- -
-
-
- -This document was generated -by Julian Seward on January, 5 2002 -using texi2html - - - diff --git a/manual_toc.html b/manual_toc.html deleted file mode 100644 index bc08705..0000000 --- a/manual_toc.html +++ /dev/null @@ -1,163 +0,0 @@ - - - - - -Untitled Document: Table of Contents - - - - - - - - - - - - - - - - - -
[Top][Contents][Index][ ? ]
-

Table of Contents

- -
-
- -This document was generated -by Julian Seward on January, 5 2002 -using texi2html - - - diff --git a/mk251.c b/mk251.c deleted file mode 100644 index 205778a..0000000 --- a/mk251.c +++ /dev/null @@ -1,16 +0,0 @@ - -/* Spew out a long sequence of the byte 251. When fed to bzip2 - versions 1.0.0 or 1.0.1, causes it to die with internal error - 1007 in blocksort.c. This assertion misses an extremely rare - case, which is fixed in this version (1.0.2) and above. -*/ - -#include - -int main () -{ - int i; - for (i = 0; i < 48500000 ; i++) - putchar(251); - return 0; -} diff --git a/randtable.c b/randtable.c index 5c922e9..af5fc73 100644 --- a/randtable.c +++ b/randtable.c @@ -63,7 +63,11 @@ /*---------------------------------------------*/ -Int32 BZ2_rNums[512] = { +#ifdef __ORCAC__ +Int16 BZ2_rNums[512] = { +#else +Int32 BZ2_rNums[512] = { +#endif 619, 720, 127, 481, 931, 816, 813, 233, 566, 247, 985, 724, 205, 454, 863, 491, 741, 242, 949, 214, 733, 859, 335, 708, 621, 574, 73, 654, 730, 472, diff --git a/spewG.c b/spewG.c deleted file mode 100644 index 7934e76..0000000 --- a/spewG.c +++ /dev/null @@ -1,39 +0,0 @@ - -/* spew out a thoroughly gigantic file designed so that bzip2 - can compress it reasonably rapidly. This is to help test - support for large files (> 2GB) in a reasonable amount of time. - I suggest you use the undocumented --exponential option to - bzip2 when compressing the resulting file; this saves a bit of - time. Note: *don't* bother with --exponential when compressing - Real Files; it'll just waste a lot of CPU time :-) - (but is otherwise harmless). -*/ - -#define _FILE_OFFSET_BITS 64 - -#include -#include - -/* The number of megabytes of junk to spew out (roughly) */ -#define MEGABYTES 5000 - -#define N_BUF 1000000 -char buf[N_BUF]; - -int main ( int argc, char** argv ) -{ - int ii, kk, p; - srandom(1); - setbuffer ( stdout, buf, N_BUF ); - for (kk = 0; kk < MEGABYTES * 515; kk+=3) { - p = 25+random()%50; - for (ii = 0; ii < p; ii++) - printf ( "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" ); - for (ii = 0; ii < p-1; ii++) - printf ( "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" ); - for (ii = 0; ii < p+1; ii++) - printf ( "ccccccccccccccccccccccccccccccccccccc" ); - } - fflush(stdout); - return 0; -} diff --git a/stristr.c b/stristr.c new file mode 100644 index 0000000..9d59094 --- /dev/null +++ b/stristr.c @@ -0,0 +1,58 @@ +/* Case-insensitive version of strstr() obtained from http://snippets.org */ + +#ifdef __ORCAC__ +segment "bzip2"; +#endif + +/* +** Designation: stristr +** +** Call syntax: char *stristr(char *String, char *Pattern) +** +** Description: This function is an ANSI version of strstr() with +** case insensitivity. (Functionally equivalent to +** the strcasestr function in some C libraries.) +** +** Return item: char *pointer if Pattern is found in String, else +** null pointer +** +** Rev History: 07/06/03 Stephen Heumann Used in bunzip2 for GNO +** 16/04/03 ? ? +** 16/07/97 Greg Thayer Optimized +** 07/04/95 Bob Stout ANSI-fy +** 02/03/94 Fred Cole Original +** +** Hereby donated to public domain. +*/ + +#include +#include +#include + +char *stristr(const char *String, const char *Pattern) +{ + char *pptr, *sptr, *start; + + for (start = (char *)String; *start != '\0'; start++) + { + /* find start of pattern in string */ + for ( ; ((*start!='\0') && (toupper(*start) != toupper(*Pattern))); + start++) + ; + + pptr = (char *)Pattern; + sptr = (char *)start; + + while (toupper(*sptr) == toupper(*pptr)) + { + sptr++; + pptr++; + + /* if end of pattern then pattern was found */ + + if ('\0' == *pptr) + return (start); + } + } + return NULL; +} diff --git a/unzcrash.c b/unzcrash.c deleted file mode 100644 index f0f17fc..0000000 --- a/unzcrash.c +++ /dev/null @@ -1,126 +0,0 @@ - -/* A test program written to test robustness to decompression of - corrupted data. Usage is - unzcrash filename - and the program will read the specified file, compress it (in memory), - and then repeatedly decompress it, each time with a different bit of - the compressed data inverted, so as to test all possible one-bit errors. - This should not cause any invalid memory accesses. If it does, - I want to know about it! - - p.s. As you can see from the above description, the process is - incredibly slow. A file of size eg 5KB will cause it to run for - many hours. -*/ - -#include -#include -#include "bzlib.h" - -#define M_BLOCK 1000000 - -typedef unsigned char uchar; - -#define M_BLOCK_OUT (M_BLOCK + 1000000) -uchar inbuf[M_BLOCK]; -uchar outbuf[M_BLOCK_OUT]; -uchar zbuf[M_BLOCK + 600 + (M_BLOCK / 100)]; - -int nIn, nOut, nZ; - -static char *bzerrorstrings[] = { - "OK" - ,"SEQUENCE_ERROR" - ,"PARAM_ERROR" - ,"MEM_ERROR" - ,"DATA_ERROR" - ,"DATA_ERROR_MAGIC" - ,"IO_ERROR" - ,"UNEXPECTED_EOF" - ,"OUTBUFF_FULL" - ,"???" /* for future */ - ,"???" /* for future */ - ,"???" /* for future */ - ,"???" /* for future */ - ,"???" /* for future */ - ,"???" /* for future */ -}; - -void flip_bit ( int bit ) -{ - int byteno = bit / 8; - int bitno = bit % 8; - uchar mask = 1 << bitno; - //fprintf ( stderr, "(byte %d bit %d mask %d)", - // byteno, bitno, (int)mask ); - zbuf[byteno] ^= mask; -} - -int main ( int argc, char** argv ) -{ - FILE* f; - int r; - int bit; - int i; - - if (argc != 2) { - fprintf ( stderr, "usage: unzcrash filename\n" ); - return 1; - } - - f = fopen ( argv[1], "r" ); - if (!f) { - fprintf ( stderr, "unzcrash: can't open %s\n", argv[1] ); - return 1; - } - - nIn = fread ( inbuf, 1, M_BLOCK, f ); - fprintf ( stderr, "%d bytes read\n", nIn ); - - nZ = M_BLOCK; - r = BZ2_bzBuffToBuffCompress ( - zbuf, &nZ, inbuf, nIn, 9, 0, 30 ); - - assert (r == BZ_OK); - fprintf ( stderr, "%d after compression\n", nZ ); - - for (bit = 0; bit < nZ*8; bit++) { - fprintf ( stderr, "bit %d ", bit ); - flip_bit ( bit ); - nOut = M_BLOCK_OUT; - r = BZ2_bzBuffToBuffDecompress ( - outbuf, &nOut, zbuf, nZ, 0, 0 ); - fprintf ( stderr, " %d %s ", r, bzerrorstrings[-r] ); - - if (r != BZ_OK) { - fprintf ( stderr, "\n" ); - } else { - if (nOut != nIn) { - fprintf(stderr, "nIn/nOut mismatch %d %d\n", nIn, nOut ); - return 1; - } else { - for (i = 0; i < nOut; i++) - if (inbuf[i] != outbuf[i]) { - fprintf(stderr, "mismatch at %d\n", i ); - return 1; - } - if (i == nOut) fprintf(stderr, "really ok!\n" ); - } - } - - flip_bit ( bit ); - } - -#if 0 - assert (nOut == nIn); - for (i = 0; i < nOut; i++) { - if (inbuf[i] != outbuf[i]) { - fprintf ( stderr, "difference at %d !\n", i ); - return 1; - } - } -#endif - - fprintf ( stderr, "all ok\n" ); - return 0; -} diff --git a/words0 b/words0 deleted file mode 100644 index 164a8ed..0000000 --- a/words0 +++ /dev/null @@ -1,5 +0,0 @@ - -If compilation produces errors, or a large number of warnings, -please read README.COMPILATION.PROBLEMS -- you might be able to -adjust the flags in this Makefile to improve matters. - diff --git a/words1 b/words1 index 2e83de9..5de4146 100644 --- a/words1 +++ b/words1 @@ -1,4 +1,4 @@ -Doing 6 tests (3 compress, 3 uncompress) ... +Doing 3 decompression tests ... If there's a problem, things might stop at this point. - + diff --git a/words2 b/words2 index 203ee39..07418d3 100644 --- a/words2 +++ b/words2 @@ -1,5 +1,3 @@ -Checking test results. If any of the four "cmp"s which follow -report any differences, something is wrong. If you can't easily -figure out what, please let me know (jseward@acm.org). - +Checking test results. If any of the three "cmp"s which +follow report any differences, something is wrong. diff --git a/words3 b/words3 index 7a6b462..2e21ecb 100644 --- a/words3 +++ b/words3 @@ -1,23 +1,16 @@ - If you got this far and the "cmp"s didn't complain, it looks -like you're in business. +like you're in business. -To install in /usr/bin, /usr/lib, /usr/man and /usr/include, type - make install -To install somewhere else, eg, /xxx/yyy/{bin,lib,man,include}, type - make install PREFIX=/xxx/yyy -If you are (justifiably) paranoid and want to see what 'make install' -is going to do, you can first do - make -n install or - make -n install PREFIX=/xxx/yyy respectively. -The -n instructs make to show the commands it would execute, but -not actually execute them. - -Instructions for use are in the preformatted manual page, in the file -bzip2.txt. For more detailed documentation, read the full manual. -It is available in Postscript form (manual.ps), PDF form (manual.pdf), -and HTML form (manual_toc.html). - -You can also do "bzip2 --help" to see some helpful information. -"bzip2 -L" displays the software license. +To install in /usr/local/bin and /usr/local/man, type + dmake justinstall +To install somewhere else, eg, /xxx/yyy/{bin,man}, type + dmake justinstall PREFIX=/xxx/yyy +If you are (justifiably) paranoid and want to see what +'dmake install' is going to do, type + dmake -n justinstall or + dmake -n justinstall PREFIX=/xxx/yyy repsectively. +The -n instructs make to show the commands it would +execute, but not actually execute them. +You can do "bunzip2 --help" to see some helpful information. +"bunzip2 -L" displays the software license. diff --git a/words4 b/words4 new file mode 100644 index 0000000..72eee7c --- /dev/null +++ b/words4 @@ -0,0 +1,10 @@ + +If all went well, bunzip2 (and bzip2recover) should +now be installed under your /usr/local hierarchy. + +Put the following line in your gshrc file so you can use bzcat: + alias bzcat "bunzip2 -c" + +Instructions for use are in the man page for bunzip2. Type + man bunzip2 +to read it.