ciderpress/reformat/Asm.cpp

/*
 * CiderPress
 * Copyright (C) 2007 by faddenSoft, LLC.  All Rights Reserved.
 * See the file LICENSE for distribution terms.
 */
/*
 * Convert assembly source code.
 *
 * S-C Assembler, LISA, and Merlin-8 are handled here.  Others, such as
 * Orca/M, are either plain text or close enough that the "converted text"
 * code handles it well enough.
 */
#include "StdAfx.h"
#include "Asm.h"


/*
 * ===========================================================================
 *		S-C Assembler
 * ===========================================================================
 */

/*
 * S-C Assembler file format (thanks to Paul Schlyter, pausch at saaf.se):
 *
 *	<16-bit file length>  [DOS 3.3 only]
 *	<line> ...
 *
 * Each line consists of:
 *	<8-bit line length>
 *	<16-bit line number>
 *	<characters> ...
 *	<end-of-line token ($00)>
 *
 * Characters may be:
 *	$00-$1f: invalid
 *	$20-$7f: literal character
 *	$80-$bf: compressed spaces (0 to 63 count)
 *	$c0    : RLE token ($c0 <n> <ch> == repeat <ch> for <n> times)
 *	$c1-$ff: invalid
 *
 * There is no end-of-file marker.
 */

/*
 * Decide whether or not we want to handle this file.
 */
void
ReformatSCAssem::Examine(ReformatHolder* pHolder)
{
	if (pHolder->GetFileType() == kTypeINT && pHolder->GetAuxType() == 0) {
		if (ReformatSCAssem::IsSCAssem(pHolder)) {
			/* definitely S-C assembler */
			pHolder->SetApplic(ReformatHolder::kReformatSCAssem,
				ReformatHolder::kApplicYes,
				ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
		} else {
			/* possibly S-C assembler */
			pHolder->SetApplic(ReformatHolder::kReformatSCAssem,
				ReformatHolder::kApplicMaybe,
				ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
		}
	} else {
		/* not S-C assembler */
		pHolder->SetApplic(ReformatHolder::kReformatSCAssem,
			ReformatHolder::kApplicNot,
			ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
	}
}

/*
 * Figure out if a type 'I' file is an Integer BASIC program or an S-C
 * assembler listing.
 *
 * They both have a line length and line number, but use different conventions
 * for marking the end of a line, and have different sets of valid chars.  We
 * don't need to fully validate the file, just test the first line.
 */
/*static*/ bool
ReformatSCAssem::IsSCAssem(const ReformatHolder* pHolder)
{
	const unsigned char* ptr = pHolder->GetSourceBuf(ReformatHolder::kPartData);
	long srcLen = pHolder->GetSourceLen(ReformatHolder::kPartData);
	int len;

	len = *ptr;
	if (len == 0 || len > srcLen)
		return false;		// should return an error, really
	if (ptr[len-1] == 0x00) {
		WMSG0("  Found 0x00, looks like S-C assembler\n");
		return true;
	} else if (ptr[len-1] == 0x01) {
		WMSG0("  Found 0x01, looks like Integer BASIC\n");
		return false;
	} else {
		WMSG1("  Got strange value 0x%02x during S-C test\n", ptr[len-1]);
		return false;		// again, should return an error
	}
}


/*
 * Reformat an S-C Assembler listing into text.  I don't know exactly what the
 * original listings looked like, so I'm just doing what A2FID.C does.
 */
int
ReformatSCAssem::Process(const ReformatHolder* pHolder,
	ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part,
	ReformatOutput* pOutput)
{
	const unsigned char* srcPtr = pHolder->GetSourceBuf(part);
	long srcLen = pHolder->GetSourceLen(part);
	long length = srcLen;
	// (this was written before tab stuff in ReformatAsm class existed)
	static const char* kSpaces64 =  "                                "
									"                                ";
	int retval = -1;

	fUseRTF = false;

	RTFBegin();

	/*
	 * Make sure there's enough here to get started.  We want to return an
	 * "okay" result because we want this treated like a reformatted empty
	 * BASIC program rather than a non-Integer file.
	 */
	if (length < 2) {
		WMSG0("  SCAssem truncated?\n");
		BufPrintf("\r\n");
		goto done;
	}

	while (length > 0) {
		unsigned char lineLen;
		unsigned short lineNum;

		/* pull the length byte, which we sanity-check */
		lineLen = *srcPtr++;
		length--;
		if (lineLen == 0) {
			WMSG0("  SCAssem found zero-length line?\n");
			break;
		}

		/* line number */
		lineNum = Read16(&srcPtr, &length);
		BufPrintf("%04u ", lineNum);

		while (*srcPtr != 0x00 && length > 0) {
			if (*srcPtr >= 0x20 && *srcPtr <= 0x7f) {
				BufPrintf("%c", *srcPtr);
			} else if (*srcPtr >= 0x80 && *srcPtr <= 0xbf) {
				BufPrintf("%s", kSpaces64 + (64+128 - *srcPtr));
			} else if (*srcPtr == 0xc0) {
				if (length > 2) {
					int count = *(srcPtr+1);
					unsigned char ch = *(srcPtr+2);

					srcPtr += 2;
					length -= 2;
					while (count--)
						BufPrintf("%c", ch);
				} else {
					WMSG1("  SCAssem GLITCH: RLE but only %d chars left\n",
						length);
					BufPrintf("?!?");
				}
			} else {
				WMSG1("  SCAssem invalid char 0x%02x\n", *srcPtr);
				BufPrintf("?");
			}

			srcPtr++;
			length--;
		}

		/* skip past EOL token */
		ASSERT(*srcPtr == 0x00 || length <= 0);
		srcPtr++;
		length--;

		RTFNewPara();
	}

done:
	RTFEnd();

	SetResultBuffer(pOutput);
	retval = 0;

//bail:
	return retval;
}


/*
 * ===========================================================================
 *		Merlin 8 and Merlin 8/16 Assembler
 * ===========================================================================
 */

/*
 * Merlin source code uses ordinary text files that usually have names
 * ending in ".S".  They use high ASCII text -- unusual for ProDOS text
 * files -- with the occasional low-ASCII space character.
 *
 * We don't absolutely need this conversion, because the files are already
 * plain text, but it's easier to read when the various pieces are tabbed
 * to reasonable screen offsets.
 *
 * The 0xa0 values seem to be used to separate pieces, while the 0x20
 * values are used for comments and other filler.  It is entirely possible
 * to have a Merlin source file with no 0x20 values.
 */

/*
 * Decide whether or not we want to handle this file.  We know it's type
 * TXT, though the aux type can be almost anything.
 *
 * If we really just want Merlin we should probably exclude DOS disks,
 * since the text file contents will match.  However, it's probably useful
 * to support DOS ED/ASM sources with this.
 */
void
ReformatMerlin::Examine(ReformatHolder* pHolder)
{
	if (pHolder->GetFileType() == kTypeTXT) {
		bool isAsm = ReformatMerlin::IsMerlin(pHolder);
		bool isDotS = strcasecmp(pHolder->GetNameExt(), ".S") == 0;

		if (isAsm && isDotS) {
			/* gotta be */
			pHolder->SetApplic(ReformatHolder::kReformatMerlin,
				ReformatHolder::kApplicYes,
				ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
		} else if (isAsm) {
			/* probably Merlin assembler, or at least *some* sort of asm */
			pHolder->SetApplic(ReformatHolder::kReformatMerlin,
				ReformatHolder::kApplicProbably,
				ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
		} else if (isDotS) {
			/* not likely, but offer it as non-default option */
			pHolder->SetApplic(ReformatHolder::kReformatMerlin,
				ReformatHolder::kApplicProbablyNot,
				ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
		} else {
			/* probably not Merlin, don't allow */
			pHolder->SetApplic(ReformatHolder::kReformatMerlin,
				ReformatHolder::kApplicNot,
				ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
		}
	} else {
		/* not S-C assembler */
		pHolder->SetApplic(ReformatHolder::kReformatMerlin,
			ReformatHolder::kApplicNot,
			ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
	}
}

/*
 * Figure out if the contents of this file match up with our expections
 * for Merlin source code.
 *
 * Specifically, does it use high ASCII and 0x20 exclusively, and does
 * it have a large number of lines that begin with a single space or the
 * comment token ('*')?
 *
 * Typical source files start with a space on 40-60% of lines, but "equates"
 * files and files that are substantially comments break the rule.
 *
 * This will also return "true" for DOS ED/ASM files.
 */
/*static*/ bool
ReformatMerlin::IsMerlin(const ReformatHolder* pHolder)
{
	const unsigned char* ptr = pHolder->GetSourceBuf(ReformatHolder::kPartData);
	long srcLen = pHolder->GetSourceLen(ReformatHolder::kPartData);

	bool isLineStart = true;
	int lineCount, spaceLineCount, commentLineCount;

	lineCount = spaceLineCount = commentLineCount = 0;
	while (srcLen--) {
		if ((*ptr & 0x80) == 0 && (*ptr != 0x20)) {
			WMSG1("  Merlin: not, found 0x%02x\n", *ptr);
			return false;
		}

		if (isLineStart) {
			lineCount++;

			if ((*ptr & 0x7f) == 0x20 && srcLen != 0 &&
				(*(ptr+1) & 0x7f) != 0x20)
				spaceLineCount++;
			if (*ptr == 0xaa)		// '*'
				commentLineCount++;
			isLineStart = false;
		}

		if (*ptr == 0x8d)
			isLineStart = true;

		ptr++;
	}

	if (!lineCount)
		return false;		// don't divide by zero

	WMSG1("  Merlin: found %d lines\n", lineCount);
	WMSG4("    %d start with spaces (%.3f%%), %d with comments (%.3f%%)\n",
		spaceLineCount, (spaceLineCount * 100.0) / lineCount,
		commentLineCount, (commentLineCount * 100.0) / lineCount);

	if ((spaceLineCount * 100) / lineCount > 40)
		return true;
	if (((spaceLineCount + commentLineCount) * 100) / lineCount > 50)
		return true;
	return false;
}


/*
 * Re-tab a Merlin assembly file.
 *
 * We try to track quoted material on the operand field to avoid tabbing
 * parts of quoted text around.  This isn't strictly necessary for a well-
 * formed Merlin file, which uses 0x20 as a "non-breaking space", but if it
 * has been "washed" through a converter or if this is actually a DOS ED/ASM
 * file, tracking quotes is almost always beneficial.
 */
int
ReformatMerlin::Process(const ReformatHolder* pHolder,
	ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part,
	ReformatOutput* pOutput)
{
	const unsigned char* srcPtr = pHolder->GetSourceBuf(part);
	long srcLen = pHolder->GetSourceLen(part);
	long length = srcLen;
	int retval = -1;
	enum { kStateLabel, kStateMnemonic, kStateOperand, kStateComment };
	int tabStop[] = { 0, 9, 15, 26 };	// 1:1 map with state enum
	int state;
	unsigned char quoteChar = '\0';

	fUseRTF = false;

	RTFBegin();

	bool isLineStart = true;
	for ( ; srcLen > 0; srcLen--, srcPtr++) {
		if (isLineStart) {
			isLineStart = false;
			OutputStart();		// begin new line in output buffer
			state = kStateLabel;
			if (*srcPtr == 0xaa)
				state = kStateComment;
		}
		if (*srcPtr == 0x8d) {
			OutputFinish();		// end of line

			BufPrintf("%s", GetOutBuf());
			RTFNewPara();

			isLineStart = true;
			if (quoteChar != '\0') {
				DebugBreak();
				quoteChar = '\0';
			}
			continue;
		}

		if (state >= kStateComment) {
			Output(*srcPtr & 0x7f);
		} else if (quoteChar != '\0') {
			if (*srcPtr == quoteChar) {
				/* close quote */
				quoteChar = '\0';
			}
			Output(*srcPtr & 0x7f);
		} else if (state == kStateOperand &&
				   (*srcPtr == '\'' + 0x80 || *srcPtr == '"' + 0x80))
		{
			/* open quote */
			quoteChar = *srcPtr;
			Output(quoteChar & 0x7f);
		} else if (*srcPtr == 0xa0) {		// high-ASCII space
			// does not trigger on 0x20; this matches behavior of
			// Merlin-16 v3.40
			state++;
			OutputTab(tabStop[state]);
		} else if (*srcPtr == 0xbb) {		// high-ASCII ';'
			// just comment, or comment on mnemonic w/o operand
			// (shouldn't tab out if line started with label but
			// contains 0x20s instead of 0xa0s between components;
			// oh well.)
			state = kStateComment;
			OutputTab(tabStop[state]);
			Output(*srcPtr & 0x7f);
		} else {
			Output(*srcPtr & 0x7f);
		}
	}

//done:
	RTFEnd();

	SetResultBuffer(pOutput);
	retval = 0;

//bail:
	return retval;
}


/*
 * ===========================================================================
 *		LISA Assembler - v2.x
 * ===========================================================================
 */

/*
 * This is for LISA v2.5 and earlier, which ran under DOS 3.3.  It used a
 * fairly simple format with tokenized mnemonics.
 *
 * The conversion was created by examination of the source files.  The table
 * of mnemonics was extracted from the assembler binary. 
 */

/*
 * Decide whether or not we want to handle this file.
 */
void
ReformatLISA2::Examine(ReformatHolder* pHolder)
{
	if (pHolder->GetSourceFormat() == ReformatHolder::kSourceFormatDOS &&
		pHolder->GetFileType() == kTypeDOS_B)
	{
		if (ReformatLISA2::IsLISA(pHolder)) {
			/* definitely LISA */
			pHolder->SetApplic(ReformatHolder::kReformatLISA2,
				ReformatHolder::kApplicYes,
				ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
		} else {
			/* maybe LISA */
			pHolder->SetApplic(ReformatHolder::kReformatLISA2,
				ReformatHolder::kApplicMaybe,
				ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
		}
	} else {
		/* not LISA */
		pHolder->SetApplic(ReformatHolder::kReformatLISA2,
			ReformatHolder::kApplicNot,
			ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
	}
}

/*
 * Quick sanity check on the file contents.
 */
bool
ReformatLISA2::IsLISA(const ReformatHolder* pHolder)
{
	const unsigned char* srcPtr = pHolder->GetSourceBuf(ReformatHolder::kPartData);
	long srcLen = pHolder->GetSourceLen(ReformatHolder::kPartData);
	unsigned short version, len;

	if (srcLen < 8)
		return false;

	version = Read16(&srcPtr, &srcLen);
	len = Read16(&srcPtr, &srcLen);

	if (len > srcLen)
		return false;

	return true;
}


/*
 * Opcode mnemonics.
 */
static const char gOpcodes[] = 
    "BGEBLTBMIBCCBCSBPLBNEBEQ"      // 80-87
    "BVSBVCBSBBNMBM1BNZBIZBIM"      // 88-8f
    "BIPBICBNCBRABTRBFLBRKBKS"      // 90-97
    "CLVCLCCLDCLIDEXDEYINXINY"      // 98-9f
    "NOPPHAPLAPHPPLPRTSRTIRSB"      // a0-a7
    "RTNSECSEISEDTAXTAYTSXTXA"      // a8-af
    "TXSTYAADDCPRDCRINRSUBLDD"      // b0-b7
    "POPPPDSTDSTPLDRSTOSET___"      // b8-bf
    "ADCANDORABITCMPCPXCPYDEC"      // c0-c7
    "EORINCJMPJSR___LDALDXLDY"      // c8-cf
    "STASTXSTYXORLSRRORROLASL"      // d0-d7
    "ADREQUORGOBJEPZSTRDCMASC"      // d8-df
    "ICLENDLSTNLSHEXBYTHBYPAU"      // e0-e7
    "DFSDCI...PAGINVBLKDBYTTL"      // e8-ef
    "SBC___LET.IF.EL.FI=  PHS"      // f0-f7
    "DPH.DAGENNOGUSR_________"      // f8-ff
    ;


/*
 * Format:
 *  2-byte version (?)
 *	2-byte length
 *  <LINE> ...
 *
 * Each line is:
 *  1-byte length
 *  <DATA>
 *  CR
 *
 * Last line has length=255.
 */

/*
 * Parse a file.
 */
int
ReformatLISA2::Process(const ReformatHolder* pHolder,
	ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part,
	ReformatOutput* pOutput)
{
	const unsigned char* srcPtr = pHolder->GetSourceBuf(part);
	long srcLen = pHolder->GetSourceLen(part);
    long actualLen;
	int retval = -1;

	fUseRTF = false;

    if (srcLen < 8) {
		WMSG0("  LISA truncated?\n");
        goto bail;
    }

	unsigned short version;

	version = Read16(&srcPtr, &srcLen);		// usually 0x1800; maybe "2.4"?
	actualLen = Read16(&srcPtr, &srcLen);

	WMSG2("  LISA version 0x%04x, len=%d\n", version, actualLen);

    if (actualLen > srcLen) {
        WMSG2("  LISA bad length (len=%ld actual=%ld)\n", srcLen, actualLen);
        goto bail;
    }

    int lineNum;
    lineNum = 0;
    while (actualLen > 0) {
        int lineLen = *srcPtr;
        if (lineLen == 0) {
            WMSG1("  LISA bad line len (%ld)\n", lineLen);
            break;
        } else if (lineLen == 255) {
            // used as end-of-file marker
            break;
        }

        lineNum++;

		OutputStart();
        ProcessLine(srcPtr);
        OutputFinish();

        //BufPrintf("%4d %s\r\n", lineNum, GetOutBuf());
        BufPrintf("%s\r\n", GetOutBuf());

        srcPtr += lineLen+1;
        actualLen -= lineLen+1;
    }

	SetResultBuffer(pOutput);
	retval = 0;

bail:
    return retval;
}

void
ReformatLISA2::ProcessLine(const unsigned char* buf)
{
    int len = *buf;
    unsigned char uch;

	// consume length byte
    buf++;
    len--;

    if (*buf >= 0x80) {
        // starting the opcode, tab past label field
        OutputTab(kOpTab);
    } else if (*buf != ';' && len > 8) {
        // starting with 8-character label
		bool doPrint = true;
        for (int i = 0; i < 8; i++) {
            uch = *buf;
            if (uch < 0x20 || uch >= 0x80) {
                WMSG1("  LISA funky char 0x%02x in label\n", uch);
                break;
            } else if (uch == 0x20) {
				doPrint = false;
			}
			if (doPrint)
				Output(uch);
            buf++;
            len--;
        }
		if (len > 0 && *buf == ':') {
			Output(*buf);
			buf++;
			len--;
		}
        OutputTab(kOpTab);
    }

    bool mnemonicDone = false;
    bool operandDone = false;
    while (len--) {
        uch = *buf++;

        if (uch >= 0x20 && uch < 0x80) {
			if (mnemonicDone && uch != 0x20)
				operandDone = true;
			if (mnemonicDone && !operandDone && uch == 0x20) {
				// suppress extra spaces between mnemonic and operand
			} else
				Output(uch);
        } else if (uch < 0x20) {
			// Values from 0x01 - 0x05 are used to separate the opcode from
			// the operand, and seem to "hint" the operand type (immediate,
			// absolute, etc).  Just ignore for now.
        } else if (uch == 0x0d) {
            // don't output CR to line buf
            if (len) {
                WMSG0("WARNING: got early CR\n");
			}
        } else if (mnemonicDone) {
			// Values >= 0x80 are mnemonics, but we've already seen it.
            // LISA seems to use 0xbb to separate operand and comment field
            // (would be "STP" mnemonic).  I don't see other uses, so I'm
			// just going to tab over instead of outputing a second
			// mnemonic value.
			if (len > 1) {
				OutputTab(kComTab);
				Output(';');
			}
        } else {
            const char* mnemonic;

            mnemonic = &gOpcodes[(uch - 128) * 3];
            Output(mnemonic[0]);
            Output(mnemonic[1]);
            Output(mnemonic[2]);
            OutputTab(kAdTab);
            mnemonicDone = true;
        }
    }
}


/*
 * ===========================================================================
 *		LISA Assembler - v4 and v5
 * ===========================================================================
 */

/*
 * The ProDOS version of LISA uses the INT filetype with the assembler
 * version number in the aux type.  The version is always < $4000.
 *
 * The file format looks like this:
 *	4-byte header
 *	symbol dictionary, 8 bytes per symbol
 *	<line> ...
 *
 * The way the lines are decoded is fairly involved.  The code here was
 * developed from the LISA v3.2a sources, as found on the A2ROMulan CD-ROM.
 */

/*
 * Opcode mnemonics.
 */
static const char gMnemonics3[256*3 +1] =
    // 0x00 (SN, M65.2) - Group 1 instructions
    "addadcandcmpeorldaorasbc"
    "stasubxor"
    // 0x0b - Group 2 instructions
             "asldecinclsrrol"
    "ror"
    // 0x11 - Group 3 instructions
       ".ifwhlbrabccbcsbeqbfl"
    "bgebltbmibnebplbtrbvcbvs"
    "jsrobjorgphs"
    // 0x24 - Group 4 instructions
                ".mdfzrinplcl"
    "rls"
    // 0x29 - Group 5 instructions
       "bitcpxcpyjmpldxldystx"
    "stytrbtsbstz"
    // 0x34 - Group 6 instructions
                "=  conepzequ"
    "set"
    // 0x39 - Group 7 instructions
       ".daadrbytcspdbyhby"
    // 0x3f - Group 8 instructions
                         "anx"
    "sbtttlchnblkdciinvrvsmsg"
    "strzro"
    // 0x4a - Group 9 instructions
          "dfshexusrsav"
    //M65LEN2  equ      * - M65.2
                      "??????"      // 0x4e-0x4f
    "????????????????????????"      // 0x50-0x57
    "????????????????????????"      // 0x58-0x5f
    "????????????????????????"      // 0x60-0x67
    "????????????????????????"      // 0x68-0x6f
    "????????????????????????"      // 0x70-0x77
    "????????????????????????"      // 0x78-0x7f
    "????????????????????????"      // 0x80-0x87
    "????????????????????????"      // 0x88-0x8f
    "????????????????????????"      // 0x90-0x97
    "????????????????????????"      // 0x98-0x9f
    "????????????????????????"      // 0xa0-0xa7
    "????????????????????????"      // 0xa8-0xaf

    // 0xb0 (SS M65.1) - assembler directives
    ".el.fi.me.wedphif1if2end"
    "expgenlstnlsnognoxpagpau"
    "nlccnd   "
    // 0xc2 - Single-byte instructions
             "asllsrrolrordec"
    "incbrkclccldcliclvdexdey"
    "inxinynopphaphpplaplprti"
    "rtssecsedseitaxtaytsxtxa"
    "txstyaphxphyplxply"
    //M65LEN1  equ      * - M65.1

                      "??????"      // 0xe6-0xe7
    "????????????????????????"      // 0xe8-0xef
    "????????????????????????"      // 0xf0-0xff
    "????????????????????????"      // 0xf8-0xff
    ;

/*
 * Determine whether this is one of our files.
 */
void
ReformatLISA3::Examine(ReformatHolder* pHolder)
{
	/*
	 * Note we cannot false-positive on an INT file on a DOS disk, because
	 * in DOS 3.3 INT files always have zero aux type.
	 */
	if (pHolder->GetFileType() == kTypeINT &&
		pHolder->GetAuxType() < 0x4000)
	{
		if (ReformatLISA3::IsLISA(pHolder)) {
			/* definitely LISA */
			pHolder->SetApplic(ReformatHolder::kReformatLISA3,
				ReformatHolder::kApplicYes,
				ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
		} else {
			/* possibly LISA */
			pHolder->SetApplic(ReformatHolder::kReformatLISA3,
				ReformatHolder::kApplicMaybe,
				ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
		}
	} else {
		/* not LISA */
		pHolder->SetApplic(ReformatHolder::kReformatLISA3,
			ReformatHolder::kApplicNot,
			ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
	}
}

/*
 * Decide if this is one of ours or perhaps an Integer BASIC or S-C
 * assembler source.
 */
/*static*/ bool
ReformatLISA3::IsLISA(const ReformatHolder* pHolder)
{
	bool dosStructure = (pHolder->GetSourceFormat() == ReformatHolder::kSourceFormatDOS);
	const unsigned char* srcPtr = pHolder->GetSourceBuf(ReformatHolder::kPartData);
	long srcLen = pHolder->GetSourceLen(ReformatHolder::kPartData);

	if (pHolder->GetSourceFormat() == ReformatHolder::kSourceFormatDOS)
		return false;		// can only live under ProDOS; need len + aux type

	if (srcLen < kHeaderLen+2)
		return false;		// too short

    unsigned short codeLen, symLen;

    codeLen = srcPtr[0x00] | srcPtr[0x01] << 8;
    symLen = srcPtr[0x02] | srcPtr[0x03] << 8;

    if ((symLen & 0x0003) != 0 || symLen > 512*8 || symLen > srcLen) {
        WMSG0("  LISA3 bad symLen\n");
        return false;
    }
    if (codeLen > srcLen) {
        WMSG0("  LISA3 funky codeLen\n");
        return false;
    }
    if (codeLen + symLen + kHeaderLen > srcLen) {
        WMSG0("  LISA3 bad combined len\n");
        return false;
    }

	return true;
}

/*
 * Parse a file.
 */
int
ReformatLISA3::Process(const ReformatHolder* pHolder,
	ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part,
	ReformatOutput* pOutput)
{
	const unsigned char* srcPtr = pHolder->GetSourceBuf(part);
	long srcLen = pHolder->GetSourceLen(part);
	int retval = -1;

    if (srcLen < kHeaderLen+2) {
        WMSG0("  LISA3 too short\n");
        goto bail;
    }

	fUseRTF = false;

    unsigned short codeLen, symLen;

    codeLen = srcPtr[0x00] | srcPtr[0x01] << 8;
    symLen = srcPtr[0x02] | srcPtr[0x03] << 8;

    printf("codeLen=%d, symLen=%d\n", codeLen, symLen);

    if ((symLen & 0x0003) != 0 || symLen > 512*8 || symLen > srcLen) {
        WMSG0("  LISA3 bad symLen\n");
        goto bail;
    }
    if (codeLen > srcLen) {
        WMSG0("  LISA3 funky codeLen\n");
        goto bail;
    }
    if (codeLen + symLen + kHeaderLen > srcLen) {
        WMSG0("  LISA3 bad combined len\n");
        goto bail;
    }

    fSymCount = symLen / 8;
    fSymTab = srcPtr + kHeaderLen;
#if 0
	int ii;
    for (ii = 0; ii < fSymCount; ii++) {
        OutputStart();
        PrintSymEntry(ii);
        OutputFinish();
        WMSG2("%d: %s\n", ii, GetOutBuf());
    }
#endif

    /*
     * Do stuff with source lines.
     */
    const unsigned char* codePtr;
    const unsigned char* endPtr;
    int lineNum;

    codePtr = srcPtr + kHeaderLen + symLen;
    endPtr = srcPtr + srcLen;
    assert(codePtr < endPtr);
    lineNum = 0;

    while (codePtr < endPtr) {
        unsigned char flagByte;
        int lineLen;

        OutputStart();
        lineNum++;

#if 0
        {
            char offbuf[12];
            sprintf(offbuf, "0x%04x", codePtr - srcPtr);
            Output(offbuf);
            Output('-');
        }
#endif

        flagByte = *codePtr++;
        if (flagByte < 0x80) {
            /* BIGONE - explicit length, complex line */
            lineLen = flagByte;
            /* subtract 1 from flagByte, because len includes flagByte */
            if (flagByte > 0)
                ProcessLine(codePtr, flagByte-1);
        } else {
            /* SPCLCASE - locals, labels, comments */
            if (flagByte >= kLCLTKN) {
                lineLen = 1;
                if (flagByte == kCMNTTKN+1) {
                    Output(';');
                } else if (flagByte == kCMNTTKN) {
                    Output('*');
                } else if (flagByte < kLBLTKN) {
                    /* CNVRTLCL: 0xf0 - 0xf9 - local numeric labels */
                    Output('^');
                    Output('0' + flagByte - 0xf0);
                    Output(':');
                } else if (flagByte < kMACTKN) {
                    /* normal label; 0xfb means add 256 */
                    int idx;
                    idx = *codePtr | (flagByte & 0x01) << 8;
                    PrintSymEntry(idx);
                    Output(':');
                    lineLen = 2;
                } else {
                    /* macro (only object on line) */
                    assert(flagByte == kMACTKN || flagByte == kMACTKN+1);
                    OutputTab(kOpTab);
                    int idx;
                    idx = *codePtr | (flagByte & 0x01) << 8;
                    Output('/');        // MACROCHR
                    PrintSymEntry(idx);
                    lineLen = 2;
                }
            } else {
                /* SHRTMNM2 - simple, standard mnemonic */
                lineLen = 1;
                OutputTab(kOpTab);
                PrintMnemonic(flagByte);
            }
        }
        

        if (lineLen == 0) {
            /* end of file */
            break;
        }

        OutputFinish();
        //BufPrintf("%d: %s\r\n", lineNum, outBuf);
        BufPrintf("%s\r\n", GetOutBuf());

        codePtr += lineLen-1;
    }

    WMSG3("codePtr=%p endPtr=%p numLines=%d\n", codePtr, endPtr, lineNum-1);
    WMSG1("extra = %d\n", endPtr - codePtr);

	SetResultBuffer(pOutput);
	retval = 0;

bail:
	fSymTab = nil;
    return retval;
}


/*
 * BIGONE
 */
void
ReformatLISA3::ProcessLine(const unsigned char* codePtr, int len)
{
    unsigned char mnemonic = 0;

    //printf("{code=0x%02x len=%d}", *codePtr, len);
    if (*codePtr == kCMNTTKN+1 || *codePtr == kCMNTTKN) {
        switch (*codePtr) {
        case kCMNTTKN+1:    Output(';');    break;
        case kCMNTTKN:      Output('*');    break;
        default:
            assert(false);
        }
        // CNVCMNT
        codePtr++;
        while (--len)
            Output(*codePtr++ & 0x7f);

        goto bail;
    } else if (*codePtr == kMACTKN || *codePtr == kMACTKN+1) {
        /* CHKMACRO - handle macro */
        unsigned short idx;
        mnemonic = *codePtr;
        idx = (*codePtr & 0x01) << 8;
        idx |= *++codePtr;
        OutputTab(kOpTab);
        Output('/');        // MACROCHR
        PrintSymEntry(idx);
        codePtr++;
        len -= 2;
        goto ConvtOperand;
    } else if (*codePtr == kLBLTKN || *codePtr == kLBLTKN+1) {
        /* CHKCLBL - handle label at start of line */
        unsigned short idx;
        idx = (*codePtr & 0x01) << 8;
        idx |= *++codePtr;
        PrintSymEntry(idx);
        codePtr++;
        len -= 2;
        // goto ConvtMnem
    } else if (*codePtr >= kLCLTKN) {
        /* CHKLLBL - handle local label (^) */
        Output('^');
        Output((char) (*codePtr - 0xc0));
        codePtr++;
        len--;
        // goto CNVTMNEM
    } else {
        /* no label; current value is the mnemonic; continue w/o advancing */
        // fall through to CNVTMNEM
    }

    /* CNVTMNEM */
    mnemonic = *codePtr++;
    len--;
    //printf("{mne=0x%02x}", mnemonic);
    if (mnemonic >= kMACTKN) {
        /* CNVRTMAC */
        assert(mnemonic == kMACTKN || mnemonic == kMACTKN+1);
        OutputTab(kOpTab);
        int idx;
        idx = *codePtr++;
        idx |= (mnemonic & 0x01) << 8;
        Output('/');        // MACROCHR
        PrintSymEntry(idx);
        len--;
        //printf("{MAC:%d}", len);
    } else {
        OutputTab(kOpTab);
        PrintMnemonic(mnemonic);
    }

ConvtOperand:
    /* ConvtOperand */
    //printf("{cen=%d}", len);
    ConvertOperand(mnemonic, &codePtr, &len);

bail:
    //if (len > 0)
    //    WMSG1("{LEN=%d}", len);

    return;
}


/*
 * CNVOPRND
 */
void
ReformatLISA3::ConvertOperand(unsigned char mnemonic,
	const unsigned char** pCodePtr, int* pLen)
{
    static const char kOPRTRST1[] = "+-*/&|^=<>%<><";
    static const char kOPRTRST2[] = "\0\0\0\0\0\0\0\0\0\0\0==>";

    const unsigned char* codePtr = *pCodePtr;
    int len = *pLen;
    OperandResult result;
    unsigned char adrsMode = 0;
    unsigned char val;

    //printf("{opr len=%d}", len);

    if (mnemonic >= kCMNTTKN) {
        /* OUTCMNT2 */
        PrintComment(adrsMode, codePtr, len);
        goto bail;
    }
    if (mnemonic < kSS) {
        if (mnemonic < kGROUP3 || (mnemonic >= kGROUP5 && mnemonic < kGROUP6)) {
            // address mode is explicit
            adrsMode = *codePtr++;
            len--;
            //printf("{adrs=0x%02x}", adrsMode);
        }
    }
    OutputTab(kAdTab);
    if (adrsMode >= 0x10 && adrsMode < 0x80)
        Output('(');

    /* OUTOPRND */
    while (len > 0) {
        val = *codePtr++;
        len--;

        if (val == 0x0e) {
            Output('~');
            continue;       // goto OutOprnd
        } else if (val == 0x0f) {
            Output('-');
            continue;       // goto OutOprnd
        } else if (val == 0x3a) {
            Output('#');
            continue;       // goto OutOprnd
        } else if (val == 0x3b) {
            Output('/');
            continue;       // goto OutOprnd
        } else if (val == 0x3d) {
            Output('@');
            continue;       // goto OutOprnd
        } else {
            result = PrintNum(adrsMode, val, &codePtr, &len);
            if (result == kResultGotoOutOprnd)
                continue;   //goto OutOprnd;
            else if (result == kResultFailed)
                goto bail;
            // else goto OutOprtr
        }

OutOprtr:
        unsigned char opr;

        if (!len)
            break;
        opr = *codePtr++;
        len--;

        if (opr < 0x0e) {
            Output(' ');
            Output(kOPRTRST1[opr]);
            if (kOPRTRST2[opr] != '\0')
                Output(kOPRTRST2[opr]);
            Output(' ');
            // goto OutOprnd
        } else if (opr < 0x20 || opr >= 0x30) {
            // NOOPRTR
            if (opr == kCMNTTKN+1) {
                PrintComment(adrsMode, codePtr, len);
                codePtr += len;
                len = 0;
                goto bail;
            }
            Output(',');
            codePtr--;      // back up
            len++;
            // goto OutOprnd
        } else {
            Output('+');
            result = PrintNum(adrsMode, opr - 0x10, &codePtr, &len);
            if (result == kResultGotoOutOprnd)
                continue;
            else if (result == kResultGotoOutOprtr)
                goto OutOprtr;
            else
                goto bail;
        }
    }
    PrintComment(adrsMode, codePtr, len);

bail:
    *pCodePtr = codePtr;
    *pLen = len;
}

/*
 * Output a single byte as a binary string.
 */
void
ReformatLISA3::PrintBin(unsigned char val)
{
    char buf[9];
    buf[8] = '\0';

    for (int bit = 0; bit < 8; bit++)
        buf[bit] = '0' + ((val >> (7-bit)) & 0x01);
    Output(buf);
}

/*
 * OUTNUM
 */
ReformatLISA3::OperandResult
ReformatLISA3::PrintNum(int adrsMode, unsigned char val,
    const unsigned char** pCodePtr, int* pLen)
{
    const unsigned char* codePtr = *pCodePtr;
    int len = *pLen;
    OperandResult result = kResultUnknown;
    char numBuf[12];

    // OUTNUM - these all jump to OutOprtr unless otherwise specified
    if (val < 0x1a) {
        Output(val | '0');
    } else if (val == 0x1a) {
        // 1-byte decimal
        sprintf(numBuf, "%u", *codePtr++);
        Output(numBuf);
        len--;
    } else if (val == 0x1b) {
        // 2-byte decimal
        unsigned short num;
        num = *codePtr++;
        num |= *codePtr++ << 8;
        len -= 2;
        sprintf(numBuf, "%u", num);
        Output(numBuf);
    } else if (val == 0x1c) {
        // 1-byte hex
        Output('$');
        sprintf(numBuf, "%02X", *codePtr++);
        Output(numBuf);
        len--;
    } else if (val == 0x1d) {
        // 2-byte hex
        Output('$');
        unsigned short num;
        num = *codePtr++;
        num |= *codePtr++ << 8;
        sprintf(numBuf, "%04X", num);
        Output(numBuf);
        len -= 2;
    } else if (val == 0x1e) {
        Output('%');
        PrintBin(*codePtr++);
        len--;
    } else if (val == 0x1f) {
        Output('%');
        PrintBin(*codePtr++);
        PrintBin(*codePtr++);
        len -= 2;
    } else if (val >= 0x36 && val <= 0x39) {
        // OUTIMD
        if (val == 0x36 || val == 0x37)
            Output('#');
        else
            Output('/');
        int idx;
        idx = (val & 0x01) << 8;
        idx |= *codePtr++;
        PrintSymEntry(idx);
        len--;
    } else if (val == 0x3c) {
        Output('*');        // loc cntr token
    } else if (val < 0x4a) {
        // <0..<9 tokens
        Output('<');
        Output(val - 0x10);
    } else if (val < 0x50) {
        // ?0..?5 tokens (+0x66)
        Output('?');
        Output(val - 0x1a);
    } else if (val < 0x5a) {
        Output('>');
        Output(val - 0x20);
    } else if (val < 0x60) {
        // ?6..?9 tokens (+0x5c)
        unsigned char newVal = val - 0x24;
        Output('?');
        if (newVal == ';')
            Output('#');
        else
            Output(newVal);
        if (newVal == ':')
            result = kResultGotoOutOprnd;
    } else if (val < 0x80) {
        // String tokens
        int strLen = val & 0x1f;
        if (strLen == 0) {
            // explict length
            strLen = *codePtr++;
            len--;
        }
        if (strLen > len) {
            Output("!BAD STR!");
			DebugBreak();
            result = kResultFailed;
			goto bail;
        }
        char delim;
        if (*codePtr >= 0x80)
            delim = '"';
        else
            delim = '\'';
        Output(delim);
        while (strLen--) {
            if ((*codePtr & 0x7f) == delim)
                Output(delim);
            Output(*codePtr++ & 0x7f);
            len--;
        }
        Output(delim);
    } else if (val == kLBLTKN || val == kLBLTKN+1) {
        int idx;
        idx = (val & 0x01) << 8;
        idx |= *codePtr++;
        len--;
        PrintSymEntry(idx);
    } else if (val == kCMNTTKN+1) {
        /* OUTCMNT2 */
        PrintComment(adrsMode, codePtr, len);
        codePtr += len;
        len = 0;
    } else {
        // just go to OutOprtr
    }

    if (result == kResultUnknown)
        result = kResultGotoOutOprtr;

bail:
    *pCodePtr = codePtr;
    *pLen = len;
    return result;
}

/*
 * Print symbol table entry.  Each entry is an 8-byte label packed into
 * 6 bytes.
 */
void
ReformatLISA3::PrintSymEntry(int ent)
{
    if (ent < 0 || ent >= fSymCount) {
        Output("!BAD SYM!");
        WMSG2("invalid entry %d (max %d)\n", ent, fSymCount);
		DebugBreak();
        return;
    }

    const unsigned char* packed = &fSymTab[ent * 8];
    unsigned char tmp[8];
    int i;

    tmp[0] = packed[0] >> 2;
    tmp[1] = ((packed[0] << 4) & 0x3c) | packed[1] >> 4;
    tmp[2] = ((packed[1] << 2) & 0x3c) | packed[2] >> 6;
    tmp[3] = packed[2] & 0x3f;

    tmp[4] = packed[3] >> 2;
    tmp[5] = ((packed[3] << 4) & 0x3c) | packed[4] >> 4;
    tmp[6] = ((packed[4] << 2) & 0x3c) | packed[5] >> 6;
    tmp[7] = packed[5] & 0x3f;

    for (i = 0; i < 8; i++) {
        if (tmp[i] == 0x20)
            break;
        else if (tmp[i] >= 0x20)
            Output(tmp[i]);
        else
            Output(tmp[i] | 0x40);
    }
}

void
ReformatLISA3::PrintMnemonic(unsigned char val)
{
    const char* ptr = &gMnemonics3[val * 3];
    Output(ptr[0]);
    Output(ptr[1]);
    Output(ptr[2]);
}

/*
 * OUTCMNT2
 *
 * Prints the comment.  Finishes off the operand if necessary.
 */
void
ReformatLISA3::PrintComment(int adrsMode, const unsigned char* codePtr, int len)
{
    assert(len >= 0);

    if (adrsMode == 0x04)
        Output(",X");
    else if (adrsMode == 0x08)
        Output(",Y");
    else if (adrsMode == 0x10)
        Output(')');
    else if (adrsMode == 0x20)
        Output(",X)");
    else if (adrsMode == 0x40)
        Output("),Y");

    if (len > 0) {
        OutputTab(kComTab);
        Output(';');
        while (len--)
            Output(*codePtr++ & 0x7f);
    }
}


/*
 * ===========================================================================
 *		LISA Assembler - v4 and v5
 * ===========================================================================
 */

/*
 * The ProDOS / GS/OS version of LISA uses the INT filetype with the
 * assembler version number in the aux type.  The version is always > $4000.
 *
 * The file format looks like this:
 *	16-byte header
 *	symbol dictionary
 *	<line> ...
 *
 * The way the lines are decoded is fairly involved.  The code here was
 * developed from the LISA/816 v5.0a (433) sources, as found on
 * the A2ROMulan CD-ROM.
 */

/*
 * Determine whether this is one of our files.
 */
void
ReformatLISA4::Examine(ReformatHolder* pHolder)
{
	/*
	 * Note we cannot false-positive on an INT file on a DOS disk, because
	 * in DOS 3.3 INT files always have zero aux type.
	 */
	if (pHolder->GetFileType() == kTypeINT &&
		pHolder->GetAuxType() >= 0x4000)
	{
		if (ReformatLISA4::IsLISA(pHolder)) {
			/* definitely LISA */
			pHolder->SetApplic(ReformatHolder::kReformatLISA4,
				ReformatHolder::kApplicYes,
				ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
		} else {
			/* possibly LISA */
			pHolder->SetApplic(ReformatHolder::kReformatLISA4,
				ReformatHolder::kApplicMaybe,
				ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
		}
	} else {
		/* not LISA */
		pHolder->SetApplic(ReformatHolder::kReformatLISA4,
			ReformatHolder::kApplicNot,
			ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
	}
}

/*
 * Decide if this is one of ours or perhaps an Integer BASIC or S-C
 * assembler source.
 */
/*static*/ bool
ReformatLISA4::IsLISA(const ReformatHolder* pHolder)
{
	bool dosStructure = (pHolder->GetSourceFormat() == ReformatHolder::kSourceFormatDOS);
	const unsigned char* srcPtr = pHolder->GetSourceBuf(ReformatHolder::kPartData);
	long srcLen = pHolder->GetSourceLen(ReformatHolder::kPartData);

	if (pHolder->GetSourceFormat() == ReformatHolder::kSourceFormatDOS)
		return false;		// can only live under ProDOS; need len + aux type

	if (srcLen < kHeaderLen+2)
		return false;		// too short

    unsigned short version;
    unsigned short symEnd;
    unsigned short symCount;

    version = srcPtr[0x00] | srcPtr[0x01] << 8;
    symEnd = srcPtr[0x02] | srcPtr[0x03] << 8;
    symCount = srcPtr[0x04] | srcPtr[0x05] << 8;

    if (symEnd > srcLen) {
        WMSG0("  LISA4 bad symEnd\n");
        return false;
    }
    if (symCount > symEnd) {
        WMSG2("  LISA4 funky symCount (count=%d end=%d)\n",
			symCount, symEnd);
        return false;;
    }

	unsigned char opTab, adTab, comTab;
    opTab = srcPtr[0x06];
    adTab = srcPtr[0x07];
    comTab = srcPtr[0x08];

	if (opTab < 1 || adTab < 2 || comTab < 3) {
		WMSG0("  LISA4 missing tabs\n");
		return false;
	}
	if (opTab >= 128 || adTab >= 128 || comTab >= 128) {
		WMSG0("  LISA4 huge tabs\n");
		return false;
	}

	return true;
}

static const char* gHexDigit = "0123456789ABCDEF";


/*
 * Table of mnemonics, from v5.0a editor sources.
 *
 * Some entries were not present in the editor sources, but were used
 * by sample source code, and have been added here:
 *	0x6c .assume
 *	0x7f .table
 */
static const char* gMnemonics4[] = {
    // 00 - 0f
    "???", "add", "adc", "and", "cmp", "eor", "lda", "ora",
    "sbc", "sta", "sub", "xor", "asl", "dec", "inc", "lsr",
    // 10 - 1f
    "rol", "ror", ".if", "whl", ".go", "bra", "bcc", "bcs",
    "beq", "bfl", "bge", "blt", "bmi", "bne", "bpl", "btr",
    // 20 - 2f
    "bvc", "bvs", "obj", "org", "phs", ".db", "pea", "per",
    "brl", ".md", "far", "fdr", "fzr", "inp", "lcl", "rls",
    // 30 - 3f
    "bit", "cpx", "cpy", "ldx", "ldy", "stx", "sty", "trb",
    "tsb", "stz", "pei", "rep", "sep", "jmp", "jsr", "jml",
    // 40 - 4f
    "jsl", "mvn", "mvp", "= ", "con", "epd", "epz", "eql",
    "equ", "set", ".da", "adr", "byt", "csp", "dby", "hby",
    // 50 - 5f
    "bby", "anx", "chn", "icl", "lib", "lnk", "msg", "psm",
    "rlb", "sbt", "ttl", "dci", "rvs", "str", "zro", "dfs",
    // 60 - 6f
    "hex", "usr", "sav", ".tf", "seg", "cpu", ".entry", ".ref",
    ".group", ".deref", "long", NULL, ".assume", NULL, NULL, NULL,
    // 70 - 7f
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, ".table",
    // 80 - 8f
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    // 90 - 9f
    ".el", ".fi", ".me", ".we", ".la", ".lx", ".sa", ".sx",
    "dph", "if1", "if2", "end", "exp", "gen", "lst", "nls",
    // a0 - af
    "nog", "nox", "pag", "pau", "nlc", "cnd", "asl", "lsr",
    "rol", "ror", "dec", "inc", "mvn", "mvp", "brk", "clc",
    // b0 - bf
    "cld", "cli", "clv", "dex", "dey", "inx", "iny", "nop",
    "pha", "php", "pla", "plp", "rti", "rts", "sec", "sed",
    // c0 - cf
    "sei", "tax", "tay", "tsx", "txa", "txs", "tya", "phx",
    "phy", "plx", "ply", "cop", "phb", "phd", "phk", "plb",
    // d0 - df
    "pld", "rtl", "stp", "swa", "tad", "tas", "tcd", "tcs",
    "tda", "tdc", "tsa", "tsc", "txy", "tyx", "wai", "xba",
    // e0 - ef
    "xce", ".proc", ".endp", ".table", ".endt", NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    // f0 - ff
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
};


/*
 * Parse a file.
 */
int
ReformatLISA4::Process(const ReformatHolder* pHolder,
	ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part,
	ReformatOutput* pOutput)
{
	const unsigned char* srcPtr = pHolder->GetSourceBuf(part);
	long srcLen = pHolder->GetSourceLen(part);
	int retval = -1;

    if (srcLen < kHeaderLen+2) {
        WMSG0("  LISA4 too short\n");
        goto bail;
    }

	fUseRTF = false;

    unsigned short version;
    unsigned short symEnd;

    version = srcPtr[0x00] | srcPtr[0x01] << 8;
    symEnd = srcPtr[0x02] | srcPtr[0x03] << 8;
    fSymCount = srcPtr[0x04] | srcPtr[0x05] << 8;
    fOpTab = srcPtr[0x06];
    fAdTab = srcPtr[0x07];
    fComTab = srcPtr[0x08];
    fCpuType = srcPtr[0x09];

    WMSG3("  LISA4 version = 0x%04x  symEnd=%d  symCount=%d\n",
        version, symEnd, fSymCount);
    WMSG4("  LISA4  opTab=%d adTab=%d comTab=%d cpuType=%d\n",
        fOpTab, fAdTab, fComTab, fCpuType);

    if (symEnd > srcLen) {
        WMSG0("  LISA4 bad symEnd\n");
        goto bail;
    }
    if (fSymCount > symEnd) {
        WMSG0("  LISA4 funky symCount\n");
        goto bail;
    }
	if (fSymCount > 0) {
		fSymTab = new const unsigned char*[fSymCount];
		if (fSymTab == nil)
			goto bail;
	}

    const unsigned char* symPtr;
    const unsigned char* endPtr;
    int symIdx;

    symPtr = srcPtr + kHeaderLen;
    endPtr = srcPtr + symEnd;
    if (symPtr > endPtr) {
        WMSG0("  LISA4 GLITCH: bad symEnd\n");
        goto bail;
    }

    /*
     * Generate symbol table index.
     */
    symIdx = 0;
    while (symPtr < endPtr) {
        if (symIdx < fSymCount)
            fSymTab[symIdx++] = symPtr;
        while (*symPtr != '\0')
            symPtr++;
        symPtr++;
    }
    if (symIdx != fSymCount) {
        WMSG2("  LISA4 err: symIdx is %d, symCount is %d\n", symIdx, fSymCount);
        goto bail;
    }

    WMSG3("  LISA4 symPtr=%p endPtr=%p symIdx=%d\n", symPtr, endPtr, symIdx);

    /*
     * Process source lines.
     */
    const unsigned char* codePtr;
    int lineNum;

    codePtr = srcPtr + symEnd;
    endPtr = srcPtr + srcLen;
    assert(codePtr < endPtr);
    lineNum = 0;

    while (codePtr < endPtr) {
        unsigned char flagByte;
        int lineLen;

        lineNum++;
		OutputStart();

        flagByte = *codePtr++;
        if (flagByte < 0x80) {
            /* explicit length, complex line */
            lineLen = flagByte;
            /* subtract 1 from flagByte, because len includes flagByte */
            if (flagByte > 0)
                ProcessLine(codePtr, flagByte-1);
        } else {
            /* SpecMnem - locals, labels, comments */
            if (flagByte >= kLocalTKN) {
                lineLen = 1;
                if (flagByte == kComntSemiTKN) {
                    Output(';');
                } else if (flagByte == kCommentTKN) {
                    Output('*');
                } else if (flagByte == kBlankTKN) {
                    // just a blank line
                } else if (flagByte < kLabelTKN) {
                    /* 0xf0 - 0xf9 - local numeric labels, e.g. "^1" */
                    Output('^');
                    Output('0' + flagByte - 0xf0);
                } else if (flagByte < kMacroTKN) {
                    /* 0xfa - 0xfb */
                    if (flagByte == 0xfa) {
                        /* label */
                        lineLen = 3;
                        int tmp = *codePtr | *(codePtr+1) << 8;
                        PrintSymEntry(tmp);
                    } else {
                        /* not used?? */
                        assert(lineLen == 1);
                        Output("??? ");
                    }
                } else {
                    /* macro (only object on line) */
                    assert(flagByte == kMacroTKN);
                    OutputTab(fOpTab);
                    int idx;
                    idx = *codePtr | *(codePtr+1) << 8;
                    Output('_');        // MacroChar
                    PrintSymEntry(idx);
                    lineLen = 3;
                }
            } else {
                /* OutMnem - simple, standard mnemonic */
                lineLen = 1;
                OutputTab(fOpTab);
				if (gMnemonics4[flagByte])
					Output(gMnemonics4[flagByte]);
				else
					Output("!BAD MNEMONIC!");
            }
        }
        

        if (lineLen == 0) {
            /* end of file */
            break;
        }

        OutputFinish();
        //BufPrintf("%d: %s\r\n", lineNum, GetOutBuf());
        BufPrintf("%s\r\n", GetOutBuf());

        codePtr += lineLen-1;
    }

    WMSG3("  LISA4 codePtr=%p endPtr=%p numLines=%d\n",
		codePtr, endPtr, lineNum-1);
    WMSG1("  LISA4 extra = %d\n", endPtr - codePtr);

	SetResultBuffer(pOutput);
	retval = 0;

bail:
    delete[] fSymTab;
	fSymTab = nil;
    return retval;
}

void
ReformatLISA4::ProcessLine(const unsigned char* codePtr, int len)
{
    unsigned char mnemonic = 0;

    if (*codePtr == kComntSemiTKN || *codePtr == kComntStarTKN ||
        *codePtr == kErrlnTKN)
    {
        switch (*codePtr) {
        case kComntSemiTKN:     Output(';');    break;
        case kComntStarTKN:     Output('*');    break;
        case kErrlnTKN:         Output('!');    break;
        default:
            assert(false);
        }
        codePtr++;
        while (--len)
            Output(*codePtr++ & 0x7f);

        goto bail;
    } else if (*codePtr == kMacroTKN) {
        /* handle macro */
        int idx;
        idx = *++codePtr;
        idx |= *++codePtr << 8;
        OutputTab(fOpTab);
        Output('_');        // MacroChar
        PrintSymEntry(idx);
        codePtr++;
        len -= 3;
        mnemonic = kMacroTKN;
        goto ConvtOperand;
    } else if (*codePtr == kLabelTKN) {
        /* handle label at start of line */
        unsigned short idx;
        idx = *++codePtr;
        idx |= *++codePtr << 8;
        PrintSymEntry(idx);
        codePtr++;
        len -= 3;
        // goto ConvtMnem
    } else if (*codePtr >= kLocalTKN) {
        /* handle local label (^) */
        Output('^');
        Output((char) (*codePtr - 0xc0));
        codePtr++;
        len--;
        // goto ConvtMnem
    } else {
        /* no label; current value is the mnemonic; continue w/o advancing */
        // fall through to ConvtMnem
    }

    /* ConvtMnem */
    mnemonic = *codePtr++;
    len--;
    if (mnemonic >= kMacroTKN) {
        /* OutMacro */
        assert(mnemonic == kMacroTKN);
        OutputTab(fOpTab);
        int idx;
        idx = *codePtr++;
        idx |= *codePtr++ << 8;
        Output('_');        // MacroChar
        PrintSymEntry(idx);
        len -= 2;
        //printf("{MAC:%d}", len);
    } else {
        OutputTab(fOpTab);
		if (gMnemonics4[mnemonic] != NULL)
			Output(gMnemonics4[mnemonic]);
		else {
			Output("!BAD MNEMONIC!");
			WMSG1("  LISA4 bad mnemonic 0x%02x\n", mnemonic);
			DebugBreak();
		}
        if (mnemonic >= kSS) {
            /* CnvMnem2 - mnemonic has no associated operand */
            /* need to fall into ConvertOperand to show comment */
            if (len > 0) {
                /* can only be comment here; skip comment token */
                if (*codePtr != kComntSemiTKN)
                    printf("{SKIP=0x%02x,len=%d}", *codePtr, len);
                codePtr++;
                len--;
            }
        }
    }

ConvtOperand:
    /* ConvtOperand */
    //printf("{cen=%d}", len);
    ConvertOperand(mnemonic, &codePtr, &len);

bail:
    if (len > 0)
        printf("{LEN=%d}", len);

    return;
}

/*
 * ConvtOperand
 */
void
ReformatLISA4::ConvertOperand(unsigned char mnemonic,
	const unsigned char** pCodePtr, int* pLen)
{
    /*
     * Address header char.
     */
    static const char kAdrsModeHeader[] = {
        0, 0, 0, 0, 0, 0, 0, 0, 0,      // 0-8 are null
        '(', '(', '(', '(',             // 9-12
        '[', '[',                       // 13-14
        0                               // 15
    };

    /*
     * operand lookup table - 1st char
     *     0 : not simple operand
     *  b7=1 : 1st char of simple operand
     */
    static const char kOperandTbl1[] =
        "+-*/&|^="          // 0-7
        "<>%<><~-"          // 8-F
        "01234567"          //10-17
        "89\0\0\0\0\0\0"    //18-1F
        "++++++++"          //20-27
        "++\0\0\0\0\0\0"    //28-2F
        "\0<>\0\0\0\0\0"    //30-37
        "\0*@#/^|\\"        //38-3F
        "<<<<<<<<"          //40-47
        "<<??????"          //48-4F
        ">>>>>>>>"          //50-57
        ">>????\0?"         //58-5F
        "\0\0\0\0\0\0\0\0"  //60-67
        "\0\0\0\0\0\0\0\0"  //68-6f
        "\0\0\0\0\0\0\0\0"  //70-77
        "\0\0\0\0\0\0\0\0"  //78-7f
    ;

    /*
     * operand lookup table - 2nd char
     *     0 : only 1 char
     *     1 : was unary op
     *  b7=1 : 2nd char of simple operand
     *
     * (Changed numeric 1 to '!'.  Bit 7 never set.  Normally it's set
     * for anything that isn't numeric 0 or 1.)
     */
    static const char kOperandTbl2[] =
        "\0\0\0\0\0\0\0\0"  // 0-7
        "\0\0\0==>!!"       // 8-F      note: 1's mark unaries
        "\0\0\0\0\0\0\0\0"  //10-17
        "\0\0\0\0\0\0\0\0"  //18-1F
        "01234567"          //20-27
        "89\0\0\0\0\0\0"    //28-2F
        "\0<>\0\0\0\0\0"    //30-37
        "\0\0!!!!!!"        //38-3F     note: 1's mark unaries
        "01234567"          //40-47
        "89012345"          //48-4F
        "01234567"          //50-57
        "896789\0#"         //58-5F
        "\0\0\0\0\0\0\0\0"  //60-67
        "\0\0\0\0\0\0\0\0"  //68-6f
        "\0\0\0\0\0\0\0\0"  //70-77
        "\0\0\0\0\0\0\0\0"  //78-7f
    ;

    /*
     * operator lookup table
     *     0 : not operator
     *     1 : complex operator      
     *  b7=1 : 1st char of simple operator
     *
     * (Changed numeric 1 to '!'.  Bit 7 never set.)
     */
    static const char kOperatorTbl1[] =
        "+-*/&|^="          // 0-7
        "<>%<><\0\0"        // 8-F
        "\0\0\0\0\0\0\0\0"  //10-17
        "\0\0\0\0\0\0\0\0"  //18-1F
        "!!!!!!!!"          //20-27
        "!!!!!!!!"          //28-2F
        "\0<>\0\0\0\0\0"    //30-37
        "\0\0\0\0\0\0\0\0"  //38-3F
        "\0\0\0\0\0\0\0\0"  //40-47
        "\0\0\0\0\0\0\0\0"  //48-4F
        "\0\0\0\0\0\0\0\0"  //50-57
        "\0\0\0\0\0\0\0\0"  //58-5F
        "\0\0\0\0\0\0\0\0"  //60-67
        "\0\0\0\0\0\0\0\0"  //68-6f
        "\0\0\0\0\0\0\0\0"  //70-77
        "\0\0\0\0\0\0\0\0"  //78-7f
    ;
    static const char* kOperatorTbl2 = kOperandTbl2;

    static const char* kAdrsModeTrailer[] = {
        NULL, NULL, NULL, ",X",
        ",X", ",X", NULL, ",S",
        ",Y", "),Y", ",X)", ")",
        ",S),Y", "]", "],Y", NULL,
    };


    const unsigned char* codePtr = *pCodePtr;
    int len = *pLen;
    OperandResult result;
    unsigned char adrsMode = 0;
    unsigned char val;
    char ch;

    if (mnemonic == kMacroTKN || mnemonic < kSS) {
        /* ConvtOperand */
        OutputTab(fAdTab);
        if (mnemonic != kMacroTKN) {
            if (mnemonic < kGROUP3_tkns ||
                !(mnemonic < kGROUP5_tkns || mnemonic == kMVN_tkn ||
                  mnemonic == kMVP_tkn || mnemonic >= kGROUP6_tkns))
            {
                if (len <= 0) {
                    Output("!BAD ADRS!");
                } else {
                    adrsMode = *codePtr++;
                    len--;
                }
            }
            if (adrsMode < NELEM(kAdrsModeHeader)) {
                ch = kAdrsModeHeader[adrsMode];
                if (ch != 0)
                    Output(ch);
            } else {
                Output("!BAD ADRSMODE!");
            }
        }
        //printf("{ven=%d val=0x%02x}", len, *codePtr);

        /* OutOprnd */
        while (len > 0) {
            bool doOutOprtr = false;
            val = *codePtr++;
            len--;

            if (val >= 0x80) {
                if (val == kLabelTKN) {
                    /* OutLabel */
                    int idx;
                    idx = *codePtr++;
                    idx |= *codePtr++ << 8;
                    len -= 2;
                    PrintSymEntry(idx);
                    doOutOprtr = true;
                } else if (val == kComntSemiTKN) {
                    break;      // out of while, to OutOprndDone */
                } else {
                    /* illegal token */
                    Output('!');
                    Output(',');
                    /* keep looping in OutOprnd */
                }
            } else {
                /* OutOpr2 */
                ch = kOperandTbl1[val];
                if (ch != '\0') {
                    /* simple operand */
                    Output(ch);
                    ch = kOperandTbl2[val];
                    if (ch == '!')
                        continue;       // unary, no operator, go to OutOprnd
                    else if (ch != '\0')
                        Output(ch);
                    doOutOprtr = true;
                } else {
                    /* OutOprComp - complex operand */
                    result = PrintComplexOperand(val, &codePtr, &len);
                    if (result == kResultGotoOutOprtr)
                        goto OutOprtr;
                    // else continue around in OutOprnd
                }
            }

            if (doOutOprtr) {
OutOprtr:
                unsigned char opr;

                if (!len)
                    break;
                opr = *codePtr++;
                len--;

                if (opr >= 0x80) {
not_operator:
                    if (opr == kComntSemiTKN)
                        break;  // goto OutOprndDone
                    else {
                        /* must be two sequential operands */
                        Output(',');
                        codePtr--;  // back up
                        len++;
                        // continue around to OutOprnd
                    }
                } else {
                    char opch;

                    opch = kOperatorTbl1[opr];
                    if (opch == 0) {
                        goto not_operator;
                    } else if (opch == 0 || opch == '!') {
                        /* complex */
                        Output('+');
                        opch = kOperatorTbl2[opr];
                        //printf("{opch=0x%02x}", opch);
                        if (opch != '\0') {
                            Output(opch);
                            goto OutOprtr;      // look for another
                        } else {
                            int num;
                            num = opr - 0x10;
                            result = PrintNum(num, &codePtr, &len);
                            if (result == kResultGotoOutOprtr)
                                goto OutOprtr;
                        }
                    } else {
                        /* simple */
                        Output(' ');
                        Output(opch);
                        opch = kOperatorTbl2[opr];
                        if (opch != '\0')
                            Output(opch);
                        Output(' ');
                        // continue to OutOprnd
                    }
                }
            }
        }
    }

    /* OutOprndDone */
    if (adrsMode != 0) {
        if (adrsMode < NELEM(kAdrsModeHeader)) {
            if (kAdrsModeTrailer[adrsMode] != NULL)
                Output(kAdrsModeTrailer[adrsMode]);
        } else {
            Output("!BAD ADRSMODE!");
            printf("{ADRS=%d}", adrsMode);
        }
    }

    if (len > 0) {
        OutputTab(fComTab);
        Output(';');
        while (len--)
            Output(*codePtr++ & 0x7f);
    }

//bail:
    *pCodePtr = codePtr;
    *pLen = len;
}

/*
 * CnvrtDec - convert to decimal output.
 */
void
ReformatLISA4::PrintDec(int count, const unsigned char** pCodePtr,
	int* pLen)
{
    const unsigned char* codePtr = *pCodePtr;
    int len = *pLen;
    long val = 0;
    char buf[12];       // 4 bytes, max 10 chars + sign + nul

    for (int i = 0; i < count; i++) {
        val |= *codePtr++ << (8 * i);
        len--;
    }
    sprintf(buf, "%lu", val);
    Output(buf);

    *pCodePtr = codePtr;
    *pLen = len;
}

/*
 * CnvrtHex - convert to hex output.
 */
void
ReformatLISA4::PrintHex(int count, const unsigned char** pCodePtr,
	int* pLen)
{
    const unsigned char* codePtr = *pCodePtr;
    int len = *pLen;
    unsigned char val;

    Output('$');
    for (int i = count-1; i >= 0; i--) {
        val = *(codePtr+i);
        Output(gHexDigit[(val & 0xf0) >> 4]);
        Output(gHexDigit[val & 0x0f]);
    }
    codePtr += count;
    len -= count;

    *pCodePtr = codePtr;
    *pLen = len;
}

/*
 * CnvrtBin - convert to binary output.
 */
void
ReformatLISA4::PrintBin(int count, const unsigned char** pCodePtr,
	int* pLen)
{
    const unsigned char* codePtr = *pCodePtr;
    int len = *pLen;
    unsigned char val;
    char buf[9];

    buf[8] = '\0';

    Output('%');
    for (int i = count-1; i >= 0; i--) {
        val = *(codePtr+i);
        for (int bit = 0; bit < 8; bit++)
            buf[bit] = '0' + ((val >> (7-bit)) & 0x01);
        Output(buf);
    }

    codePtr += count;
    len -= count;

    *pCodePtr = codePtr;
    *pLen = len;
}

/*
 * OUTNUM
 */
ReformatLISA4::OperandResult
ReformatLISA4::PrintNum(unsigned char opr, const unsigned char** pCodePtr,
    int* pLen)
{
    OperandResult result = kResultUnknown;
    const unsigned char* codePtr = *pCodePtr;
    int len = *pLen;
    int idx;

    switch (opr) {
    case kDec3_tkn:
        PrintDec(3, &codePtr, &len);
        break;
    case kDec2_tkn:
        PrintDec(2, &codePtr, &len);
        break;
    case kDec1_tkn:
        PrintDec(1, &codePtr, &len);
        break;
    case kHex3_tkn:
        PrintHex(3, &codePtr, &len);
        break;
    case kHex2_tkn:
        PrintHex(2, &codePtr, &len);
        break;
    case kHex1_tkn:
        PrintHex(1, &codePtr, &len);
        break;
    case kBin3_tkn:
        PrintBin(3, &codePtr, &len);
        break;
    case kBin2_tkn:
        PrintBin(2, &codePtr, &len);
        break;
    case kBin1_tkn:
        PrintBin(1, &codePtr, &len);
        break;
    case kcABS_tkn:
        /* coerce absolute */
        if (*codePtr == kLabelTKN) {
            codePtr++;
            len--;
        }
        idx = *codePtr++;
        idx |= *codePtr++ << 8;
        len -= 2;
        PrintSymEntry(idx);
        Output(':');
        Output('A');
        break;
    case kcLONG_tkn:
        /* coerce long */
        if (*codePtr == kLabelTKN) {
            codePtr++;
            len--;
        }
        idx = *codePtr++;
        idx |= *codePtr++ << 8;
        len -= 2;
        PrintSymEntry(idx);
        Output(':');
        Output('L');
        break;
    case kMacE_tkn:
        /* macro expression */
        Output('?');
        Output(':');
        result = kResultGotoOutOprnd;
        break;
    default:
        if (opr >= kStr31_tkn+1) {
            /* CheckMoreOprnd - none currently */
            // (not expected, but not much we can do)
            Output("{CheckMoreOprnd}");
        } else {
            /* CheckStrings */
            unsigned char strLen;
            unsigned char val;
            unsigned char delimit;

            if ((opr & 0x1f) == 0) {
                strLen = *codePtr++;
                len--;
            } else {
                strLen = opr & 0x1f;
            }
            if (strLen > len) {
                Output("!BAD STR!");
                printf("{opr=0x%02x, strLen=%d, len=%d}", opr, strLen, len);
                return kResultFailed;
            }
            val = *codePtr;
            if (val < 0x80) {
                /* ISAPOST */
                delimit = '\'';
            } else {
                /* DETKNSTR */
                delimit = '\"';
            }
            Output(delimit);
            while (strLen--) {
                val = *codePtr++ & 0x7f;
                len--;

                Output(val);
                if (val == delimit)
                    Output(val);
            }
            Output(delimit);
        }
        break;
    }

    if (result == kResultUnknown)
        result = kResultGotoOutOprtr;

    *pCodePtr = codePtr;
    *pLen = len;
    return result;
}

/*
 * OutOprComp
 */
ReformatLISA4::OperandResult
ReformatLISA4::PrintComplexOperand(unsigned char opr,
    const unsigned char** pCodePtr, int* pLen)
{
    if (opr != kBign_tkn)
        return PrintNum(opr, pCodePtr, pLen);

/*
    const unsigned char* codePtr = *pCodePtr;
    int len = *pLen;
    *pCodePtr = codePtr;
    *pLen = len;
*/

    unsigned char subClass;

    /* OutOprComp */
    subClass = *(*pCodePtr)++;
    (*pLen)--;
    if (subClass == kBigndec4_tkn) {
        PrintDec(4, pCodePtr, pLen);
    } else if (subClass == kBignhex4_tkn) {
        PrintHex(4, pCodePtr, pLen);
    } else if (subClass == kBignbin4_tkn) {
        PrintBin(4, pCodePtr, pLen);
    } else if (subClass == kBignhexs_tkn) {
        /* hex string, for HEX pseudo-op */
        unsigned char hexLen = *(*pCodePtr)++;
        (*pLen)--;
        if (hexLen > *pLen) {
            Output("!BAD HEX!");
            return kResultFailed;
        }
        while (hexLen--) {
            unsigned char val = *(*pCodePtr)++;
            (*pLen)--;
            Output(gHexDigit[(val & 0xf0) >> 4]);
            Output(gHexDigit[val & 0x0f]);
        }
    } else if (subClass == kBignstring_tkn) {
        /* undelimited string */
        unsigned char strLen = *(*pCodePtr)++;
        (*pLen)--;
        if (strLen > *pLen) {
            Output("!BAD USTR!");
            return kResultFailed;
        }
        while (strLen--) {
            unsigned char val = *(*pCodePtr)++;
            (*pLen)--;
            Output(val & 0x7f);
        }
    } else {
        Output("!BAD CPLX OPRND!");
		DebugBreak();
        printf("OPR=%d SUBCLASS=%d", opr, subClass);
        return kResultFailed;
    }

    return kResultGotoOutOprtr;
}

/*
 * Print symbol table entry.
 */
void
ReformatLISA4::PrintSymEntry(int ent)
{
    if (ent < 0 || ent >= fSymCount) {
        Output("!BAD SYM!");
        return;
    }

    const unsigned char* str = fSymTab[ent];

    unsigned char uc;
    str++;
    while (1) {
        uc = *str++;
        if (!uc)
            break;
        else if (uc < 0x80)
            uc |= 0x20;
        Output(uc & 0x7f);
    }
}