nulib2/nufxlib/samples/Launder.c

702 lines
23 KiB
C
Raw Normal View History

2000-05-23 01:55:31 +00:00
/*
* NuFX archive manipulation library
2007-02-19 23:12:22 +00:00
* Copyright (C) 2000-2007 by Andy McFadden, All Rights Reserved.
2000-05-23 01:55:31 +00:00
* This is free software; you can redistribute it and/or modify it under the
2007-02-19 23:12:22 +00:00
* terms of the BSD License, see the file COPYING.LIB.
2000-05-23 01:55:31 +00:00
*
* Run an archive through the laundry. The net result is a duplicate
* archive that matches the original in most respects. Extracting the
* files from the duplicate will yield the same results as if they were
* extracted from the original, but the duplicate archive may differ
* in subtle ways (e.g. filename threads may be added, data may be
* recompressed).
*
* This demonstrates copying threads around, both with and without
* recompressing, between two archives that are open simultaneously. This
* also tests NufxLib's thread ordering and verifies that you can abort
* frequently with no adverse effects.
*
* NOTE: depending on the options you select, you may need to have enough
2000-05-23 01:55:31 +00:00
* memory to hold the entire uncompressed contents of the original archive.
* The memory requirements are reduced if you use the "copy only" flag, and
* are virtually eliminated if you use "frequent flush".
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "NufxLib.h"
#include "Common.h"
#define kTempFile "tmp-laundry"
2000-05-23 01:55:31 +00:00
#define kFlagCopyOnly (1)
#define kFlagReverseThreads (1 << 1)
#define kFlagFrequentFlush (1 << 2)
#define kFlagFrequentAbort (1 << 3) /* implies FrequentFlush */
#define kFlagUseTmp (1 << 4)
2000-05-23 01:55:31 +00:00
/*
* Globals.
*/
char gSentRecordWarning = false;
/*
* This gets called when a buffer DataSource is no longer needed.
*/
Distinguish Unicode and Mac OS Roman strings NufxLib has historically made no effort to distinguish between the character set used for filenames on the local disk, and for filenames stored within the archive. Now all Unicode filename strings use the UNICHAR type and have "UNI" in the name, and all Mac OS Roman strings have "MOR" in the name. (The naming convention makes it obvious when you're assigning the wrong thing; on Linux both formats are char*, so the compiler won't tell you if you get it wrong.) The distinction is necessary because filesystems generally support Unicode these days, but on Windows you need to use a separate set of wide-character file I/O functions. (On Linux it all works with "narrow" strings, and the UTF-8 encoding is interpreted by applications.) The character set used for NuFX archive filenames is MOR, matching what GS/OS + HFS supported, and we want to be able to convert back and forth between MOR and a Unicode representation. This change updates the various character types and string names, adds conversion functions, and updates NuLib2 for proper execution on Linux. It does not include the (probably extensive) changes required for Windows UTF-16 support. Instead, the conversion functions are no-ops, which should result in NuLib2 for Windows continuing to behave in the same slightly broken way. This adds "test-names", which exercises Unicode filenames a bit. It will not pass on Win32. Also, tweaked the Linux makefiles to have explicit dependencies, rather than empty space and an expectation that "makedepend" exists. Also, minor source code cleanups. While this probably doesn't affect binary compatibility -- it's mainly a matter of naming and string interpretation -- there's enough going on that it should be considered an API revision, so this updates the version to 3.0.0.
2014-12-24 19:14:32 +00:00
NuResult FreeCallback(NuArchive* pArchive, void* args)
{
free(args);
return kNuOK;
}
2000-05-23 01:55:31 +00:00
/*
* Copy a thread, expanding and recompressing it.
*
* This assumes the library is configured for compression (it defaults
* to LZW/2, so this is a reasonable assumption).
*/
Distinguish Unicode and Mac OS Roman strings NufxLib has historically made no effort to distinguish between the character set used for filenames on the local disk, and for filenames stored within the archive. Now all Unicode filename strings use the UNICHAR type and have "UNI" in the name, and all Mac OS Roman strings have "MOR" in the name. (The naming convention makes it obvious when you're assigning the wrong thing; on Linux both formats are char*, so the compiler won't tell you if you get it wrong.) The distinction is necessary because filesystems generally support Unicode these days, but on Windows you need to use a separate set of wide-character file I/O functions. (On Linux it all works with "narrow" strings, and the UTF-8 encoding is interpreted by applications.) The character set used for NuFX archive filenames is MOR, matching what GS/OS + HFS supported, and we want to be able to convert back and forth between MOR and a Unicode representation. This change updates the various character types and string names, adds conversion functions, and updates NuLib2 for proper execution on Linux. It does not include the (probably extensive) changes required for Windows UTF-16 support. Instead, the conversion functions are no-ops, which should result in NuLib2 for Windows continuing to behave in the same slightly broken way. This adds "test-names", which exercises Unicode filenames a bit. It will not pass on Win32. Also, tweaked the Linux makefiles to have explicit dependencies, rather than empty space and an expectation that "makedepend" exists. Also, minor source code cleanups. While this probably doesn't affect binary compatibility -- it's mainly a matter of naming and string interpretation -- there's enough going on that it should be considered an API revision, so this updates the version to 3.0.0.
2014-12-24 19:14:32 +00:00
NuError CopyThreadRecompressed(NuArchive* pInArchive, NuArchive* pOutArchive,
long flags, const NuThread* pThread, long newRecordIdx)
2000-05-23 01:55:31 +00:00
{
NuError err = kNuErrNone;
2014-12-22 02:17:23 +00:00
NuDataSource* pDataSource = NULL;
NuDataSink* pDataSink = NULL;
uint8_t* buffer = NULL;
/*
* Allocate a buffer large enough to hold all the uncompressed data, and
* wrap a data sink around it.
*
* If the thread is zero bytes long, we can skip this part.
*/
if (pThread->actualThreadEOF) {
buffer = malloc(pThread->actualThreadEOF);
2014-12-22 02:17:23 +00:00
if (buffer == NULL) {
err = kNuErrMalloc;
goto bail;
}
err = NuCreateDataSinkForBuffer(true, kNuConvertOff, buffer,
pThread->actualThreadEOF, &pDataSink);
if (err != kNuErrNone) {
fprintf(stderr, "ERROR: unable to create data sink (err=%d)\n",
err);
goto bail;
}
/*
* Expand the data. For a pre-sized thread, this grabs only the
* interesting part of the buffer.
*/
err = NuExtractThread(pInArchive, pThread->threadIdx, pDataSink);
if (err != kNuErrNone) {
fprintf(stderr, "ERROR: unable to extract thread %u (err=%d)\n",
pThread->threadIdx, err);
goto bail;
}
}
/*
* The expanded data is in the buffer, now create a data source that
* describes it.
*
* This is complicated by the existence of pre-sized threads, which
* require us to set "otherLen".
*
* We always use "actualThreadEOF" because "thThreadEOF" is broken
* for disk archives created by certain versions of ShrinkIt.
*
2014-12-22 02:17:23 +00:00
* It's okay to pass in a NULL value for "buffer", so long as the
* amount of data in the buffer is also zero. The library will do
* the right thing.
*/
if (NuIsPresizedThreadID(NuGetThreadID(pThread))) {
err = NuCreateDataSourceForBuffer(kNuThreadFormatUncompressed,
pThread->thCompThreadEOF, buffer, 0,
pThread->actualThreadEOF, FreeCallback, &pDataSource);
if (err != kNuErrNone) {
fprintf(stderr,
"ERROR: unable to create pre-sized data source (err=%d)\n",err);
goto bail;
}
} else {
err = NuCreateDataSourceForBuffer(kNuThreadFormatUncompressed,
0, buffer, 0, pThread->actualThreadEOF,
FreeCallback, &pDataSource);
if (err != kNuErrNone) {
fprintf(stderr,
"ERROR: unable to create data source (err=%d)\n", err);
goto bail;
}
}
2014-12-22 02:17:23 +00:00
buffer = NULL; /* doClose was set, so it's owned by the data source */
/*
* Schedule the data for addition to the record.
*/
err = NuAddThread(pOutArchive, newRecordIdx, NuGetThreadID(pThread),
2014-12-22 02:17:23 +00:00
pDataSource, NULL);
if (err != kNuErrNone) {
fprintf(stderr, "ERROR: unable to add thread (err=%d)\n", err);
goto bail;
}
2014-12-22 02:17:23 +00:00
pDataSource = NULL; /* library owns it now */
2000-05-23 01:55:31 +00:00
bail:
2014-12-22 02:17:23 +00:00
if (pDataSource != NULL)
NuFreeDataSource(pDataSource);
2014-12-22 02:17:23 +00:00
if (pDataSink != NULL)
NuFreeDataSink(pDataSink);
2014-12-22 02:17:23 +00:00
if (buffer != NULL)
free(buffer);
return err;
2000-05-23 01:55:31 +00:00
}
/*
* Copy a thread from one archive to another without disturbing the
* compression.
*
* There is a much more efficient way to do this: create an FP
* data source using an offset within the archive file itself.
* Since pInArchive->archiveFp isn't exposed, we can't use that,
* but under most operating systems you aren't prevented from
* opening the same file twice in read-only mode. The file offset
* in pThread tells us where the data is.
*
* The method used below is less memory-efficient but more portable.
*
* This always extracts based on the compThreadEOF, which is
* reliable but extracts a little more than we need on pre-sized
* threads (filenames, comments).
*/
Distinguish Unicode and Mac OS Roman strings NufxLib has historically made no effort to distinguish between the character set used for filenames on the local disk, and for filenames stored within the archive. Now all Unicode filename strings use the UNICHAR type and have "UNI" in the name, and all Mac OS Roman strings have "MOR" in the name. (The naming convention makes it obvious when you're assigning the wrong thing; on Linux both formats are char*, so the compiler won't tell you if you get it wrong.) The distinction is necessary because filesystems generally support Unicode these days, but on Windows you need to use a separate set of wide-character file I/O functions. (On Linux it all works with "narrow" strings, and the UTF-8 encoding is interpreted by applications.) The character set used for NuFX archive filenames is MOR, matching what GS/OS + HFS supported, and we want to be able to convert back and forth between MOR and a Unicode representation. This change updates the various character types and string names, adds conversion functions, and updates NuLib2 for proper execution on Linux. It does not include the (probably extensive) changes required for Windows UTF-16 support. Instead, the conversion functions are no-ops, which should result in NuLib2 for Windows continuing to behave in the same slightly broken way. This adds "test-names", which exercises Unicode filenames a bit. It will not pass on Win32. Also, tweaked the Linux makefiles to have explicit dependencies, rather than empty space and an expectation that "makedepend" exists. Also, minor source code cleanups. While this probably doesn't affect binary compatibility -- it's mainly a matter of naming and string interpretation -- there's enough going on that it should be considered an API revision, so this updates the version to 3.0.0.
2014-12-24 19:14:32 +00:00
NuError CopyThreadUncompressed(NuArchive* pInArchive, NuArchive* pOutArchive,
long flags, const NuThread* pThread, long newRecordIdx)
2000-05-23 01:55:31 +00:00
{
NuError err = kNuErrNone;
2014-12-22 02:17:23 +00:00
NuDataSource* pDataSource = NULL;
NuDataSink* pDataSink = NULL;
uint8_t* buffer = NULL;
/*
* If we have some data files that were left uncompressed, perhaps
* because of GSHK's "don't compress anything smaller than 512 bytes"
* rule, NufxLib will try to compress them. We disable this
* behavior by disabling compression. That way, stuff that is
* already compressed will remain that way, and stuff that isn't
* compressed won't be. (We really only need to do this once, at
* the start of the program, but it's illustrative to do it here.)
2004-08-23 00:00:10 +00:00
*
* [ I don't understand this comment. It's necessary to disable
* compression, but I don't see why uncompressed files are
* special. ++ATM 20040821 ]
*/
err = NuSetValue(pOutArchive, kNuValueDataCompression, kNuCompressNone);
if (err != kNuErrNone) {
fprintf(stderr, "ERROR: unable to set compression (err=%d)\n", err);
goto bail;
}
/*
* Allocate a buffer large enough to hold all the compressed data, and
* wrap a data sink around it.
*/
buffer = malloc(pThread->thCompThreadEOF);
2014-12-22 02:17:23 +00:00
if (buffer == NULL) {
err = kNuErrMalloc;
goto bail;
}
err = NuCreateDataSinkForBuffer(false, kNuConvertOff, buffer,
pThread->thCompThreadEOF, &pDataSink);
if (err != kNuErrNone) {
fprintf(stderr, "ERROR: unable to create data sink (err=%d)\n",
err);
goto bail;
}
/*
* Get the compressed data. For a pre-sized thread, this grabs the
* entire contents of the buffer, including the padding.
*/
err = NuExtractThread(pInArchive, pThread->threadIdx, pDataSink);
if (err != kNuErrNone) {
fprintf(stderr, "ERROR: unable to extract thread %u (err=%d)\n",
pThread->threadIdx, err);
goto bail;
}
/*
* The (perhaps compressed) data is in the buffer, now create a data
* source that describes it.
*
* This is complicated by the existence of pre-sized threads. There
* are two possibilities:
* 1. We have a certain amount of non-pre-sized data (thCompThreadEOF)
* that will expand out to a certain length (actualThreadEOF).
* 2. We have a certain amount of pre-sized data (actualThreadEOF)
* that will fit within a buffer (thCompThreadEOF).
* As you can see, the arguments need to be reversed for pre-sized
* threads.
*
* We always use "actualThreadEOF" because "thThreadEOF" is broken
* for disk archives created by certain versions of ShrinkIt.
*/
if (NuIsPresizedThreadID(NuGetThreadID(pThread))) {
err = NuCreateDataSourceForBuffer(pThread->thThreadFormat,
pThread->thCompThreadEOF, buffer, 0,
pThread->actualThreadEOF, FreeCallback, &pDataSource);
if (err != kNuErrNone) {
fprintf(stderr,
"ERROR: unable to create pre-sized data source (err=%d)\n",err);
goto bail;
}
} else {
err = NuCreateDataSourceForBuffer(pThread->thThreadFormat,
pThread->actualThreadEOF, buffer, 0,
pThread->thCompThreadEOF, FreeCallback, &pDataSource);
if (err != kNuErrNone) {
fprintf(stderr,
"ERROR: unable to create data source (err=%d)\n", err);
goto bail;
}
}
2014-12-22 02:17:23 +00:00
buffer = NULL; /* doClose was set, so it's owned by the data source */
/* yes, this is a kluge... sigh */
err = NuDataSourceSetRawCrc(pDataSource, pThread->thThreadCRC);
if (err != kNuErrNone) {
fprintf(stderr, "ERROR: can't set source CRC (err=%d)\n", err);
goto bail;
}
/*
* Schedule the data for addition to the record.
*
* Note that NuAddThread makes a copy of the data source, and clears
* "doClose" on our copy, so we are free to dispose of pDataSource.
*/
err = NuAddThread(pOutArchive, newRecordIdx, NuGetThreadID(pThread),
2014-12-22 02:17:23 +00:00
pDataSource, NULL);
if (err != kNuErrNone) {
fprintf(stderr, "ERROR: unable to add thread (err=%d)\n", err);
goto bail;
}
2014-12-22 02:17:23 +00:00
pDataSource = NULL; /* library owns it now */
2000-05-23 01:55:31 +00:00
bail:
2014-12-22 02:17:23 +00:00
if (pDataSource != NULL)
NuFreeDataSource(pDataSource);
2014-12-22 02:17:23 +00:00
if (pDataSink != NULL)
NuFreeDataSink(pDataSink);
2014-12-22 02:17:23 +00:00
if (buffer != NULL)
free(buffer);
return err;
2000-05-23 01:55:31 +00:00
}
/*
* Copy a thread from one archive to another.
*
* Depending on "flags", this will either copy it raw or uncompress and
* recompress.
*/
Distinguish Unicode and Mac OS Roman strings NufxLib has historically made no effort to distinguish between the character set used for filenames on the local disk, and for filenames stored within the archive. Now all Unicode filename strings use the UNICHAR type and have "UNI" in the name, and all Mac OS Roman strings have "MOR" in the name. (The naming convention makes it obvious when you're assigning the wrong thing; on Linux both formats are char*, so the compiler won't tell you if you get it wrong.) The distinction is necessary because filesystems generally support Unicode these days, but on Windows you need to use a separate set of wide-character file I/O functions. (On Linux it all works with "narrow" strings, and the UTF-8 encoding is interpreted by applications.) The character set used for NuFX archive filenames is MOR, matching what GS/OS + HFS supported, and we want to be able to convert back and forth between MOR and a Unicode representation. This change updates the various character types and string names, adds conversion functions, and updates NuLib2 for proper execution on Linux. It does not include the (probably extensive) changes required for Windows UTF-16 support. Instead, the conversion functions are no-ops, which should result in NuLib2 for Windows continuing to behave in the same slightly broken way. This adds "test-names", which exercises Unicode filenames a bit. It will not pass on Win32. Also, tweaked the Linux makefiles to have explicit dependencies, rather than empty space and an expectation that "makedepend" exists. Also, minor source code cleanups. While this probably doesn't affect binary compatibility -- it's mainly a matter of naming and string interpretation -- there's enough going on that it should be considered an API revision, so this updates the version to 3.0.0.
2014-12-24 19:14:32 +00:00
NuError CopyThread(NuArchive* pInArchive, NuArchive* pOutArchive, long flags,
const NuThread* pThread, long newRecordIdx)
2000-05-23 01:55:31 +00:00
{
if (flags & kFlagCopyOnly) {
return CopyThreadUncompressed(pInArchive, pOutArchive, flags, pThread,
newRecordIdx);
} else {
return CopyThreadRecompressed(pInArchive, pOutArchive, flags, pThread,
newRecordIdx);
}
2000-05-23 01:55:31 +00:00
}
/*
* Copy a record from the input to the output.
*
* This runs through the list of threads and copies each one individually.
* It will copy them in the original order or in reverse order (the latter
* of which will not usually have any effect since NufxLib imposes a
* specific thread ordering on most common types) depending on "flags".
*/
Distinguish Unicode and Mac OS Roman strings NufxLib has historically made no effort to distinguish between the character set used for filenames on the local disk, and for filenames stored within the archive. Now all Unicode filename strings use the UNICHAR type and have "UNI" in the name, and all Mac OS Roman strings have "MOR" in the name. (The naming convention makes it obvious when you're assigning the wrong thing; on Linux both formats are char*, so the compiler won't tell you if you get it wrong.) The distinction is necessary because filesystems generally support Unicode these days, but on Windows you need to use a separate set of wide-character file I/O functions. (On Linux it all works with "narrow" strings, and the UTF-8 encoding is interpreted by applications.) The character set used for NuFX archive filenames is MOR, matching what GS/OS + HFS supported, and we want to be able to convert back and forth between MOR and a Unicode representation. This change updates the various character types and string names, adds conversion functions, and updates NuLib2 for proper execution on Linux. It does not include the (probably extensive) changes required for Windows UTF-16 support. Instead, the conversion functions are no-ops, which should result in NuLib2 for Windows continuing to behave in the same slightly broken way. This adds "test-names", which exercises Unicode filenames a bit. It will not pass on Win32. Also, tweaked the Linux makefiles to have explicit dependencies, rather than empty space and an expectation that "makedepend" exists. Also, minor source code cleanups. While this probably doesn't affect binary compatibility -- it's mainly a matter of naming and string interpretation -- there's enough going on that it should be considered an API revision, so this updates the version to 3.0.0.
2014-12-24 19:14:32 +00:00
NuError CopyRecord(NuArchive* pInArchive, NuArchive* pOutArchive, long flags,
NuRecordIdx recordIdx)
2000-05-23 01:55:31 +00:00
{
NuError err = kNuErrNone;
const NuRecord* pRecord;
const NuThread* pThread;
NuFileDetails fileDetails;
NuRecordIdx newRecordIdx;
long numThreads;
int idx;
/*
* Grab the original record and see how many threads it has.
*/
err = NuGetRecord(pInArchive, recordIdx, &pRecord);
if (err != kNuErrNone) {
fprintf(stderr, "ERROR: unable to get recordIdx %u\n", recordIdx);
goto bail;
}
/*
* Pre-v3 records didn't put CRCs in the thread headers. If we just
* copy the thread over without reprocessing the data, we won't compute
* a CRC for the thread, and we will get CRC failures.
*/
if (!gSentRecordWarning && (flags & kFlagCopyOnly) &&
pRecord->recVersionNumber < 3)
{
printf("WARNING: pre-v3 records that aren't recompressed may exhibit CRC failures\n");
gSentRecordWarning = true;
}
numThreads = NuRecordGetNumThreads(pRecord);
if (!numThreads) {
fprintf(stderr, "WARNING: recordIdx=%u was empty\n", recordIdx);
goto bail;
}
/*
* Create a new record that looks just like the original.
*/
memset(&fileDetails, 0, sizeof(fileDetails));
Distinguish Unicode and Mac OS Roman strings NufxLib has historically made no effort to distinguish between the character set used for filenames on the local disk, and for filenames stored within the archive. Now all Unicode filename strings use the UNICHAR type and have "UNI" in the name, and all Mac OS Roman strings have "MOR" in the name. (The naming convention makes it obvious when you're assigning the wrong thing; on Linux both formats are char*, so the compiler won't tell you if you get it wrong.) The distinction is necessary because filesystems generally support Unicode these days, but on Windows you need to use a separate set of wide-character file I/O functions. (On Linux it all works with "narrow" strings, and the UTF-8 encoding is interpreted by applications.) The character set used for NuFX archive filenames is MOR, matching what GS/OS + HFS supported, and we want to be able to convert back and forth between MOR and a Unicode representation. This change updates the various character types and string names, adds conversion functions, and updates NuLib2 for proper execution on Linux. It does not include the (probably extensive) changes required for Windows UTF-16 support. Instead, the conversion functions are no-ops, which should result in NuLib2 for Windows continuing to behave in the same slightly broken way. This adds "test-names", which exercises Unicode filenames a bit. It will not pass on Win32. Also, tweaked the Linux makefiles to have explicit dependencies, rather than empty space and an expectation that "makedepend" exists. Also, minor source code cleanups. While this probably doesn't affect binary compatibility -- it's mainly a matter of naming and string interpretation -- there's enough going on that it should be considered an API revision, so this updates the version to 3.0.0.
2014-12-24 19:14:32 +00:00
fileDetails.storageNameMOR = pRecord->filenameMOR;
fileDetails.fileSysID = pRecord->recFileSysID;
fileDetails.fileSysInfo = pRecord->recFileSysInfo;
fileDetails.access = pRecord->recAccess;
fileDetails.fileType = pRecord->recFileType;
fileDetails.extraType = pRecord->recExtraType;
fileDetails.storageType = pRecord->recStorageType;
fileDetails.createWhen = pRecord->recCreateWhen;
fileDetails.modWhen = pRecord->recModWhen;
fileDetails.archiveWhen = pRecord->recArchiveWhen;
err = NuAddRecord(pOutArchive, &fileDetails, &newRecordIdx);
if (err != kNuErrNone) {
fprintf(stderr, "ERROR: NuAddRecord failed (err=%d)\n", err);
goto bail;
}
/*
* Copy the threads.
*/
if (flags & kFlagReverseThreads) {
for (idx = numThreads-1; idx >= 0; idx--) {
pThread = NuGetThread(pRecord, idx);
2014-12-22 02:17:23 +00:00
assert(pThread != NULL);
err = CopyThread(pInArchive, pOutArchive, flags, pThread,
newRecordIdx);
if (err != kNuErrNone)
goto bail;
}
} else {
for (idx = 0; idx < numThreads; idx++) {
pThread = NuGetThread(pRecord, idx);
2014-12-22 02:17:23 +00:00
assert(pThread != NULL);
err = CopyThread(pInArchive, pOutArchive, flags, pThread,
newRecordIdx);
if (err != kNuErrNone)
goto bail;
}
}
2000-05-23 01:55:31 +00:00
bail:
return err;
2000-05-23 01:55:31 +00:00
}
/*
* Launder an archive from inFile to outFile.
*
* Returns 0 on success, nonzero on failure.
*/
Distinguish Unicode and Mac OS Roman strings NufxLib has historically made no effort to distinguish between the character set used for filenames on the local disk, and for filenames stored within the archive. Now all Unicode filename strings use the UNICHAR type and have "UNI" in the name, and all Mac OS Roman strings have "MOR" in the name. (The naming convention makes it obvious when you're assigning the wrong thing; on Linux both formats are char*, so the compiler won't tell you if you get it wrong.) The distinction is necessary because filesystems generally support Unicode these days, but on Windows you need to use a separate set of wide-character file I/O functions. (On Linux it all works with "narrow" strings, and the UTF-8 encoding is interpreted by applications.) The character set used for NuFX archive filenames is MOR, matching what GS/OS + HFS supported, and we want to be able to convert back and forth between MOR and a Unicode representation. This change updates the various character types and string names, adds conversion functions, and updates NuLib2 for proper execution on Linux. It does not include the (probably extensive) changes required for Windows UTF-16 support. Instead, the conversion functions are no-ops, which should result in NuLib2 for Windows continuing to behave in the same slightly broken way. This adds "test-names", which exercises Unicode filenames a bit. It will not pass on Win32. Also, tweaked the Linux makefiles to have explicit dependencies, rather than empty space and an expectation that "makedepend" exists. Also, minor source code cleanups. While this probably doesn't affect binary compatibility -- it's mainly a matter of naming and string interpretation -- there's enough going on that it should be considered an API revision, so this updates the version to 3.0.0.
2014-12-24 19:14:32 +00:00
int LaunderArchive(const char* inFile, const char* outFile,
NuValue compressMethod, long flags)
2000-05-23 01:55:31 +00:00
{
NuError err = kNuErrNone;
2014-12-22 02:17:23 +00:00
NuArchive* pInArchive = NULL;
NuArchive* pOutArchive = NULL;
const NuMasterHeader* pMasterHeader;
NuRecordIdx recordIdx;
Distinguish Unicode and Mac OS Roman strings NufxLib has historically made no effort to distinguish between the character set used for filenames on the local disk, and for filenames stored within the archive. Now all Unicode filename strings use the UNICHAR type and have "UNI" in the name, and all Mac OS Roman strings have "MOR" in the name. (The naming convention makes it obvious when you're assigning the wrong thing; on Linux both formats are char*, so the compiler won't tell you if you get it wrong.) The distinction is necessary because filesystems generally support Unicode these days, but on Windows you need to use a separate set of wide-character file I/O functions. (On Linux it all works with "narrow" strings, and the UTF-8 encoding is interpreted by applications.) The character set used for NuFX archive filenames is MOR, matching what GS/OS + HFS supported, and we want to be able to convert back and forth between MOR and a Unicode representation. This change updates the various character types and string names, adds conversion functions, and updates NuLib2 for proper execution on Linux. It does not include the (probably extensive) changes required for Windows UTF-16 support. Instead, the conversion functions are no-ops, which should result in NuLib2 for Windows continuing to behave in the same slightly broken way. This adds "test-names", which exercises Unicode filenames a bit. It will not pass on Win32. Also, tweaked the Linux makefiles to have explicit dependencies, rather than empty space and an expectation that "makedepend" exists. Also, minor source code cleanups. While this probably doesn't affect binary compatibility -- it's mainly a matter of naming and string interpretation -- there's enough going on that it should be considered an API revision, so this updates the version to 3.0.0.
2014-12-24 19:14:32 +00:00
uint32_t idx, flushStatus;
err = NuOpenRO(inFile, &pInArchive);
if (err != kNuErrNone) {
fprintf(stderr, "ERROR: couldn't open input archive '%s' (err=%d)\n",
inFile, err);
goto bail;
}
err = NuOpenRW(outFile, kTempFile, kNuOpenCreat|kNuOpenExcl, &pOutArchive);
if (err != kNuErrNone) {
fprintf(stderr, "ERROR: couldn't open output archive '%s' (err=%d)\n",
outFile, err);
goto bail;
}
2004-08-23 00:00:10 +00:00
/* turn off "mimic GSHK" */
err = NuSetValue(pInArchive, kNuValueMimicSHK, true);
if (err != kNuErrNone) {
fprintf(stderr, "ERROR: unable to disable GSHK quirks (err=%d)\n",
err);
goto bail;
}
/* allow duplicates, in case the original archive has them */
err = NuSetValue(pOutArchive, kNuValueAllowDuplicates, true);
if (err != kNuErrNone) {
fprintf(stderr, "ERROR: couldn't allow duplicates (err=%d)\n", err);
goto bail;
}
/* set the compression method */
err = NuSetValue(pOutArchive, kNuValueDataCompression, compressMethod);
if (err != kNuErrNone) {
fprintf(stderr, "ERROR: unable to set compression (err=%d)\n",
err);
goto bail;
}
if (flags & kFlagUseTmp) {
err = NuSetValue(pOutArchive, kNuValueModifyOrig, false);
if (err != kNuErrNone) {
fprintf(stderr,
"ERROR: couldn't disable modify orig (err=%d)\n", err);
goto bail;
}
}
err = NuGetMasterHeader(pInArchive, &pMasterHeader);
if (err != kNuErrNone) {
fprintf(stderr, "ERROR: couldn't get master header (err=%d)\n", err);
goto bail;
}
/*
* Iterate through the set of records.
*/
Distinguish Unicode and Mac OS Roman strings NufxLib has historically made no effort to distinguish between the character set used for filenames on the local disk, and for filenames stored within the archive. Now all Unicode filename strings use the UNICHAR type and have "UNI" in the name, and all Mac OS Roman strings have "MOR" in the name. (The naming convention makes it obvious when you're assigning the wrong thing; on Linux both formats are char*, so the compiler won't tell you if you get it wrong.) The distinction is necessary because filesystems generally support Unicode these days, but on Windows you need to use a separate set of wide-character file I/O functions. (On Linux it all works with "narrow" strings, and the UTF-8 encoding is interpreted by applications.) The character set used for NuFX archive filenames is MOR, matching what GS/OS + HFS supported, and we want to be able to convert back and forth between MOR and a Unicode representation. This change updates the various character types and string names, adds conversion functions, and updates NuLib2 for proper execution on Linux. It does not include the (probably extensive) changes required for Windows UTF-16 support. Instead, the conversion functions are no-ops, which should result in NuLib2 for Windows continuing to behave in the same slightly broken way. This adds "test-names", which exercises Unicode filenames a bit. It will not pass on Win32. Also, tweaked the Linux makefiles to have explicit dependencies, rather than empty space and an expectation that "makedepend" exists. Also, minor source code cleanups. While this probably doesn't affect binary compatibility -- it's mainly a matter of naming and string interpretation -- there's enough going on that it should be considered an API revision, so this updates the version to 3.0.0.
2014-12-24 19:14:32 +00:00
for (idx = 0; idx < pMasterHeader->mhTotalRecords; idx++) {
err = NuGetRecordIdxByPosition(pInArchive, idx, &recordIdx);
if (err != kNuErrNone) {
Distinguish Unicode and Mac OS Roman strings NufxLib has historically made no effort to distinguish between the character set used for filenames on the local disk, and for filenames stored within the archive. Now all Unicode filename strings use the UNICHAR type and have "UNI" in the name, and all Mac OS Roman strings have "MOR" in the name. (The naming convention makes it obvious when you're assigning the wrong thing; on Linux both formats are char*, so the compiler won't tell you if you get it wrong.) The distinction is necessary because filesystems generally support Unicode these days, but on Windows you need to use a separate set of wide-character file I/O functions. (On Linux it all works with "narrow" strings, and the UTF-8 encoding is interpreted by applications.) The character set used for NuFX archive filenames is MOR, matching what GS/OS + HFS supported, and we want to be able to convert back and forth between MOR and a Unicode representation. This change updates the various character types and string names, adds conversion functions, and updates NuLib2 for proper execution on Linux. It does not include the (probably extensive) changes required for Windows UTF-16 support. Instead, the conversion functions are no-ops, which should result in NuLib2 for Windows continuing to behave in the same slightly broken way. This adds "test-names", which exercises Unicode filenames a bit. It will not pass on Win32. Also, tweaked the Linux makefiles to have explicit dependencies, rather than empty space and an expectation that "makedepend" exists. Also, minor source code cleanups. While this probably doesn't affect binary compatibility -- it's mainly a matter of naming and string interpretation -- there's enough going on that it should be considered an API revision, so this updates the version to 3.0.0.
2014-12-24 19:14:32 +00:00
fprintf(stderr, "ERROR: couldn't get record #%u (err=%d)\n",
idx, err);
goto bail;
}
err = CopyRecord(pInArchive, pOutArchive, flags, recordIdx);
if (err != kNuErrNone)
goto bail;
/*
* If "frequent abort" is set, abort what we just did and redo it.
*/
if (flags & kFlagFrequentAbort) {
/*printf("(abort)\n");*/
err = NuAbort(pOutArchive);
if (err != kNuErrNone) {
fprintf(stderr, "ERROR: abort failed (err=%d)\n", err);
goto bail;
}
err = CopyRecord(pInArchive, pOutArchive, flags, recordIdx);
if (err != kNuErrNone)
goto bail;
}
/*
* If "frequent abort" or "frequent flush" is set, flush after
* each record is copied.
*/
if ((flags & kFlagFrequentAbort) || (flags & kFlagFrequentFlush)) {
/*printf("(flush)\n");*/
err = NuFlush(pOutArchive, &flushStatus);
if (err != kNuErrNone) {
fprintf(stderr,
Distinguish Unicode and Mac OS Roman strings NufxLib has historically made no effort to distinguish between the character set used for filenames on the local disk, and for filenames stored within the archive. Now all Unicode filename strings use the UNICHAR type and have "UNI" in the name, and all Mac OS Roman strings have "MOR" in the name. (The naming convention makes it obvious when you're assigning the wrong thing; on Linux both formats are char*, so the compiler won't tell you if you get it wrong.) The distinction is necessary because filesystems generally support Unicode these days, but on Windows you need to use a separate set of wide-character file I/O functions. (On Linux it all works with "narrow" strings, and the UTF-8 encoding is interpreted by applications.) The character set used for NuFX archive filenames is MOR, matching what GS/OS + HFS supported, and we want to be able to convert back and forth between MOR and a Unicode representation. This change updates the various character types and string names, adds conversion functions, and updates NuLib2 for proper execution on Linux. It does not include the (probably extensive) changes required for Windows UTF-16 support. Instead, the conversion functions are no-ops, which should result in NuLib2 for Windows continuing to behave in the same slightly broken way. This adds "test-names", which exercises Unicode filenames a bit. It will not pass on Win32. Also, tweaked the Linux makefiles to have explicit dependencies, rather than empty space and an expectation that "makedepend" exists. Also, minor source code cleanups. While this probably doesn't affect binary compatibility -- it's mainly a matter of naming and string interpretation -- there's enough going on that it should be considered an API revision, so this updates the version to 3.0.0.
2014-12-24 19:14:32 +00:00
"ERROR: flush failed (err=%d, status=0x%04x)\n",
err, flushStatus);
goto bail;
}
}
}
/* first and only flush if frequent-flushing wasn't enabled */
err = NuFlush(pOutArchive, &flushStatus);
if (err != kNuErrNone) {
Distinguish Unicode and Mac OS Roman strings NufxLib has historically made no effort to distinguish between the character set used for filenames on the local disk, and for filenames stored within the archive. Now all Unicode filename strings use the UNICHAR type and have "UNI" in the name, and all Mac OS Roman strings have "MOR" in the name. (The naming convention makes it obvious when you're assigning the wrong thing; on Linux both formats are char*, so the compiler won't tell you if you get it wrong.) The distinction is necessary because filesystems generally support Unicode these days, but on Windows you need to use a separate set of wide-character file I/O functions. (On Linux it all works with "narrow" strings, and the UTF-8 encoding is interpreted by applications.) The character set used for NuFX archive filenames is MOR, matching what GS/OS + HFS supported, and we want to be able to convert back and forth between MOR and a Unicode representation. This change updates the various character types and string names, adds conversion functions, and updates NuLib2 for proper execution on Linux. It does not include the (probably extensive) changes required for Windows UTF-16 support. Instead, the conversion functions are no-ops, which should result in NuLib2 for Windows continuing to behave in the same slightly broken way. This adds "test-names", which exercises Unicode filenames a bit. It will not pass on Win32. Also, tweaked the Linux makefiles to have explicit dependencies, rather than empty space and an expectation that "makedepend" exists. Also, minor source code cleanups. While this probably doesn't affect binary compatibility -- it's mainly a matter of naming and string interpretation -- there's enough going on that it should be considered an API revision, so this updates the version to 3.0.0.
2014-12-24 19:14:32 +00:00
fprintf(stderr, "ERROR: flush failed (err=%d, status=0x%04x)\n",
err, flushStatus);
goto bail;
}
2000-05-23 01:55:31 +00:00
bail:
2014-12-22 02:17:23 +00:00
if (pInArchive != NULL)
NuClose(pInArchive);
2014-12-22 02:17:23 +00:00
if (pOutArchive != NULL) {
if (err != kNuErrNone)
NuAbort(pOutArchive);
NuClose(pOutArchive); /* flush pending changes and close */
}
return (err != kNuErrNone);
2000-05-23 01:55:31 +00:00
}
/*
* Can't count on having getopt on non-UNIX platforms, so just use this
* quick version instead. May not work exactly like getopt(), but it
* does everything we need here.
*/
int myoptind = 0;
2014-12-22 02:17:23 +00:00
char* myoptarg = NULL;
const char* curchar = NULL;
int skipnext = false;
Distinguish Unicode and Mac OS Roman strings NufxLib has historically made no effort to distinguish between the character set used for filenames on the local disk, and for filenames stored within the archive. Now all Unicode filename strings use the UNICHAR type and have "UNI" in the name, and all Mac OS Roman strings have "MOR" in the name. (The naming convention makes it obvious when you're assigning the wrong thing; on Linux both formats are char*, so the compiler won't tell you if you get it wrong.) The distinction is necessary because filesystems generally support Unicode these days, but on Windows you need to use a separate set of wide-character file I/O functions. (On Linux it all works with "narrow" strings, and the UTF-8 encoding is interpreted by applications.) The character set used for NuFX archive filenames is MOR, matching what GS/OS + HFS supported, and we want to be able to convert back and forth between MOR and a Unicode representation. This change updates the various character types and string names, adds conversion functions, and updates NuLib2 for proper execution on Linux. It does not include the (probably extensive) changes required for Windows UTF-16 support. Instead, the conversion functions are no-ops, which should result in NuLib2 for Windows continuing to behave in the same slightly broken way. This adds "test-names", which exercises Unicode filenames a bit. It will not pass on Win32. Also, tweaked the Linux makefiles to have explicit dependencies, rather than empty space and an expectation that "makedepend" exists. Also, minor source code cleanups. While this probably doesn't affect binary compatibility -- it's mainly a matter of naming and string interpretation -- there's enough going on that it should be considered an API revision, so this updates the version to 3.0.0.
2014-12-24 19:14:32 +00:00
int mygetopt(int argc, char** argv, const char* optstr)
{
if (!myoptind) {
myoptind = 1;
if (argc <= 1)
return EOF;
curchar = argv[myoptind];
if (*curchar != '-')
return EOF;
}
curchar++;
if (*curchar == '\0') {
myoptind++;
if (skipnext)
myoptind++;
if (myoptind >= argc)
return EOF;
curchar = argv[myoptind];
if (*curchar != '-')
return EOF;
curchar++;
}
while (*optstr != '\0') {
if (*optstr == *curchar) {
/*printf("MATCHED '%c'\n", *optstr);*/
if (*(optstr+1) == ':') {
skipnext = true;
myoptarg = argv[myoptind+1];
/*printf("ATTACHED '%s'\n", myoptarg);*/
}
return *curchar;
}
optstr++;
}
fprintf(stderr, "Unrecognized option '%c'\n", *curchar);
return *curchar;
}
2000-05-23 01:55:31 +00:00
/*
* Print usage info.
*/
Distinguish Unicode and Mac OS Roman strings NufxLib has historically made no effort to distinguish between the character set used for filenames on the local disk, and for filenames stored within the archive. Now all Unicode filename strings use the UNICHAR type and have "UNI" in the name, and all Mac OS Roman strings have "MOR" in the name. (The naming convention makes it obvious when you're assigning the wrong thing; on Linux both formats are char*, so the compiler won't tell you if you get it wrong.) The distinction is necessary because filesystems generally support Unicode these days, but on Windows you need to use a separate set of wide-character file I/O functions. (On Linux it all works with "narrow" strings, and the UTF-8 encoding is interpreted by applications.) The character set used for NuFX archive filenames is MOR, matching what GS/OS + HFS supported, and we want to be able to convert back and forth between MOR and a Unicode representation. This change updates the various character types and string names, adds conversion functions, and updates NuLib2 for proper execution on Linux. It does not include the (probably extensive) changes required for Windows UTF-16 support. Instead, the conversion functions are no-ops, which should result in NuLib2 for Windows continuing to behave in the same slightly broken way. This adds "test-names", which exercises Unicode filenames a bit. It will not pass on Win32. Also, tweaked the Linux makefiles to have explicit dependencies, rather than empty space and an expectation that "makedepend" exists. Also, minor source code cleanups. While this probably doesn't affect binary compatibility -- it's mainly a matter of naming and string interpretation -- there's enough going on that it should be considered an API revision, so this updates the version to 3.0.0.
2014-12-24 19:14:32 +00:00
void Usage(const char* argv0)
2000-05-23 01:55:31 +00:00
{
fprintf(stderr, "Usage: %s [-crfat] [-m method] infile.shk outfile.shk\n",
argv0);
fprintf(stderr, "\t-c : copy only, does not recompress data\n");
fprintf(stderr, "\t-r : copy threads in reverse order to test ordering\n");
fprintf(stderr, "\t-f : call Flush frequently to reduce memory usage\n");
fprintf(stderr, "\t-a : exercise nufxlib Abort code frequently\n");
2002-10-10 16:01:14 +00:00
fprintf(stderr, "\t-t : write to temp file instead of directly to outfile.shk\n");
fprintf(stderr,
"\t[method] is one of {sq,lzw1,lzw2,lzc12,lzc16,deflate,bzip2}\n");
fprintf(stderr, "\tIf not specified, method defaults to lzw2\n");
2000-05-23 01:55:31 +00:00
}
/*
* Grab the name of an archive to read.
*/
Distinguish Unicode and Mac OS Roman strings NufxLib has historically made no effort to distinguish between the character set used for filenames on the local disk, and for filenames stored within the archive. Now all Unicode filename strings use the UNICHAR type and have "UNI" in the name, and all Mac OS Roman strings have "MOR" in the name. (The naming convention makes it obvious when you're assigning the wrong thing; on Linux both formats are char*, so the compiler won't tell you if you get it wrong.) The distinction is necessary because filesystems generally support Unicode these days, but on Windows you need to use a separate set of wide-character file I/O functions. (On Linux it all works with "narrow" strings, and the UTF-8 encoding is interpreted by applications.) The character set used for NuFX archive filenames is MOR, matching what GS/OS + HFS supported, and we want to be able to convert back and forth between MOR and a Unicode representation. This change updates the various character types and string names, adds conversion functions, and updates NuLib2 for proper execution on Linux. It does not include the (probably extensive) changes required for Windows UTF-16 support. Instead, the conversion functions are no-ops, which should result in NuLib2 for Windows continuing to behave in the same slightly broken way. This adds "test-names", which exercises Unicode filenames a bit. It will not pass on Win32. Also, tweaked the Linux makefiles to have explicit dependencies, rather than empty space and an expectation that "makedepend" exists. Also, minor source code cleanups. While this probably doesn't affect binary compatibility -- it's mainly a matter of naming and string interpretation -- there's enough going on that it should be considered an API revision, so this updates the version to 3.0.0.
2014-12-24 19:14:32 +00:00
int main(int argc, char** argv)
2000-05-23 01:55:31 +00:00
{
NuValue compressMethod = kNuCompressLZW2;
Distinguish Unicode and Mac OS Roman strings NufxLib has historically made no effort to distinguish between the character set used for filenames on the local disk, and for filenames stored within the archive. Now all Unicode filename strings use the UNICHAR type and have "UNI" in the name, and all Mac OS Roman strings have "MOR" in the name. (The naming convention makes it obvious when you're assigning the wrong thing; on Linux both formats are char*, so the compiler won't tell you if you get it wrong.) The distinction is necessary because filesystems generally support Unicode these days, but on Windows you need to use a separate set of wide-character file I/O functions. (On Linux it all works with "narrow" strings, and the UTF-8 encoding is interpreted by applications.) The character set used for NuFX archive filenames is MOR, matching what GS/OS + HFS supported, and we want to be able to convert back and forth between MOR and a Unicode representation. This change updates the various character types and string names, adds conversion functions, and updates NuLib2 for proper execution on Linux. It does not include the (probably extensive) changes required for Windows UTF-16 support. Instead, the conversion functions are no-ops, which should result in NuLib2 for Windows continuing to behave in the same slightly broken way. This adds "test-names", which exercises Unicode filenames a bit. It will not pass on Win32. Also, tweaked the Linux makefiles to have explicit dependencies, rather than empty space and an expectation that "makedepend" exists. Also, minor source code cleanups. While this probably doesn't affect binary compatibility -- it's mainly a matter of naming and string interpretation -- there's enough going on that it should be considered an API revision, so this updates the version to 3.0.0.
2014-12-24 19:14:32 +00:00
int32_t major, minor, bug;
const char* pBuildDate;
long flags = 0;
int errorFlag;
int ic;
int cc;
2014-12-22 02:17:23 +00:00
(void) NuGetVersion(&major, &minor, &bug, &pBuildDate, NULL);
Distinguish Unicode and Mac OS Roman strings NufxLib has historically made no effort to distinguish between the character set used for filenames on the local disk, and for filenames stored within the archive. Now all Unicode filename strings use the UNICHAR type and have "UNI" in the name, and all Mac OS Roman strings have "MOR" in the name. (The naming convention makes it obvious when you're assigning the wrong thing; on Linux both formats are char*, so the compiler won't tell you if you get it wrong.) The distinction is necessary because filesystems generally support Unicode these days, but on Windows you need to use a separate set of wide-character file I/O functions. (On Linux it all works with "narrow" strings, and the UTF-8 encoding is interpreted by applications.) The character set used for NuFX archive filenames is MOR, matching what GS/OS + HFS supported, and we want to be able to convert back and forth between MOR and a Unicode representation. This change updates the various character types and string names, adds conversion functions, and updates NuLib2 for proper execution on Linux. It does not include the (probably extensive) changes required for Windows UTF-16 support. Instead, the conversion functions are no-ops, which should result in NuLib2 for Windows continuing to behave in the same slightly broken way. This adds "test-names", which exercises Unicode filenames a bit. It will not pass on Win32. Also, tweaked the Linux makefiles to have explicit dependencies, rather than empty space and an expectation that "makedepend" exists. Also, minor source code cleanups. While this probably doesn't affect binary compatibility -- it's mainly a matter of naming and string interpretation -- there's enough going on that it should be considered an API revision, so this updates the version to 3.0.0.
2014-12-24 19:14:32 +00:00
printf("Using NuFX lib %d.%d.%d built on or after %s\n",
major, minor, bug, pBuildDate);
errorFlag = false;
while ((ic = mygetopt(argc, argv, "crfatm:")) != EOF) {
switch (ic) {
case 'c': flags |= kFlagCopyOnly; break;
case 'r': flags |= kFlagReverseThreads; break;
case 'f': flags |= kFlagFrequentFlush; break;
case 'a': flags |= kFlagFrequentAbort; break;
case 't': flags |= kFlagUseTmp; break;
case 'm':
{
struct {
const char* str;
NuValue val;
NuFeature feature;
} methods[] = {
{ "sq", kNuCompressSQ, kNuFeatureCompressSQ },
{ "lzw1", kNuCompressLZW1, kNuFeatureCompressLZW },
{ "lzw2", kNuCompressLZW2, kNuFeatureCompressLZW },
{ "lzc12", kNuCompressLZC12, kNuFeatureCompressLZC },
{ "lzc16", kNuCompressLZC16, kNuFeatureCompressLZC },
{ "deflate", kNuCompressDeflate, kNuFeatureCompressDeflate},
{ "bzip2", kNuCompressBzip2, kNuFeatureCompressBzip2 },
};
char* methodStr = myoptarg;
int i;
for (i = 0; i < NELEM(methods); i++) {
if (strcmp(methods[i].str, methodStr) == 0) {
compressMethod = methods[i].val;
break;
}
}
if (i == NELEM(methods)) {
fprintf(stderr, "ERROR: unknown method '%s'\n", methodStr);
errorFlag++;
break;
}
if (NuTestFeature(methods[i].feature) != kNuErrNone) {
fprintf(stderr,
"ERROR: compression method '%s' not supported\n",
methodStr);
errorFlag++;
break;
}
}
break;
default:
errorFlag++;
break;
}
}
if (errorFlag || argc != myoptind+2) {
Usage(argv[0]);
exit(2);
}
cc = LaunderArchive(argv[myoptind], argv[myoptind+1], compressMethod,flags);
if (cc == 0)
printf("Success!\n");
else
printf("Failed.\n");
exit(cc != 0);
2000-05-23 01:55:31 +00:00
}