ciderpress/reformat/Asm.cpp

/*
 * CiderPress
 * Copyright (C) 2007 by faddenSoft, LLC.  All Rights Reserved.
 * See the file LICENSE for distribution terms.
 */
/*
 * Convert assembly source code.
 *
 * S-C Assembler, LISA, and Merlin-8 are handled here.  Others, such as
 * Orca/M, are either plain text or close enough that the "converted text"
 * code handles it well enough.
 */
#include "StdAfx.h"
#include "Asm.h"


/*
 * ===========================================================================
 *      S-C Assembler
 * ===========================================================================
 */

/*
 * S-C Assembler file format (thanks to Paul Schlyter, pausch at saaf.se):
 *
 *  <16-bit file length>  [DOS 3.3 only]
 *  <line> ...
 *
 * Each line consists of:
 *  <8-bit line length>
 *  <16-bit line number>
 *  <characters> ...
 *  <end-of-line token ($00)>
 *
 * Characters may be:
 *  $00-$1f: invalid
 *  $20-$7f: literal character
 *  $80-$bf: compressed spaces (0 to 63 count)
 *  $c0    : RLE token ($c0 <n> <ch> == repeat <ch> for <n> times)
 *  $c1-$ff: invalid
 *
 * There is no end-of-file marker.
 */

/*
 * Decide whether or not we want to handle this file.
 */
void
ReformatSCAssem::Examine(ReformatHolder* pHolder)
{
    if (pHolder->GetFileType() == kTypeINT && pHolder->GetAuxType() == 0) {
        if (ReformatSCAssem::IsSCAssem(pHolder)) {
            /* definitely S-C assembler */
            pHolder->SetApplic(ReformatHolder::kReformatSCAssem,
                ReformatHolder::kApplicYes,
                ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
        } else {
            /* possibly S-C assembler */
            pHolder->SetApplic(ReformatHolder::kReformatSCAssem,
                ReformatHolder::kApplicMaybe,
                ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
        }
    } else {
        /* not S-C assembler */
        pHolder->SetApplic(ReformatHolder::kReformatSCAssem,
            ReformatHolder::kApplicNot,
            ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
    }
}

/*
 * Figure out if a type 'I' file is an Integer BASIC program or an S-C
 * assembler listing.
 *
 * They both have a line length and line number, but use different conventions
 * for marking the end of a line, and have different sets of valid chars.  We
 * don't need to fully validate the file, just test the first line.
 */
/*static*/ bool
ReformatSCAssem::IsSCAssem(const ReformatHolder* pHolder)
{
    const unsigned char* ptr = pHolder->GetSourceBuf(ReformatHolder::kPartData);
    long srcLen = pHolder->GetSourceLen(ReformatHolder::kPartData);
    int len;

    len = *ptr;
    if (len == 0 || len > srcLen)
        return false;       // should return an error, really
    if (ptr[len-1] == 0x00) {
        WMSG0("  Found 0x00, looks like S-C assembler\n");
        return true;
    } else if (ptr[len-1] == 0x01) {
        WMSG0("  Found 0x01, looks like Integer BASIC\n");
        return false;
    } else {
        WMSG1("  Got strange value 0x%02x during S-C test\n", ptr[len-1]);
        return false;       // again, should return an error
    }
}


/*
 * Reformat an S-C Assembler listing into text.  I don't know exactly what the
 * original listings looked like, so I'm just doing what A2FID.C does.
 */
int
ReformatSCAssem::Process(const ReformatHolder* pHolder,
    ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part,
    ReformatOutput* pOutput)
{
    const unsigned char* srcPtr = pHolder->GetSourceBuf(part);
    long srcLen = pHolder->GetSourceLen(part);
    long length = srcLen;
    // (this was written before tab stuff in ReformatAsm class existed)
    static const char* kSpaces64 =  "                                "
                                    "                                ";
    int retval = -1;

    fUseRTF = false;

    RTFBegin();

    /*
     * Make sure there's enough here to get started.  We want to return an
     * "okay" result because we want this treated like a reformatted empty
     * BASIC program rather than a non-Integer file.
     */
    if (length < 2) {
        WMSG0("  SCAssem truncated?\n");
        BufPrintf("\r\n");
        goto done;
    }

    while (length > 0) {
        unsigned char lineLen;
        unsigned short lineNum;

        /* pull the length byte, which we sanity-check */
        lineLen = *srcPtr++;
        length--;
        if (lineLen == 0) {
            WMSG0("  SCAssem found zero-length line?\n");
            break;
        }

        /* line number */
        lineNum = Read16(&srcPtr, &length);
        BufPrintf("%04u ", lineNum);

        while (*srcPtr != 0x00 && length > 0) {
            if (*srcPtr >= 0x20 && *srcPtr <= 0x7f) {
                BufPrintf("%c", *srcPtr);
            } else if (*srcPtr >= 0x80 && *srcPtr <= 0xbf) {
                BufPrintf("%s", kSpaces64 + (64+128 - *srcPtr));
            } else if (*srcPtr == 0xc0) {
                if (length > 2) {
                    int count = *(srcPtr+1);
                    unsigned char ch = *(srcPtr+2);

                    srcPtr += 2;
                    length -= 2;
                    while (count--)
                        BufPrintf("%c", ch);
                } else {
                    WMSG1("  SCAssem GLITCH: RLE but only %d chars left\n",
                        length);
                    BufPrintf("?!?");
                }
            } else {
                WMSG1("  SCAssem invalid char 0x%02x\n", *srcPtr);
                BufPrintf("?");
            }

            srcPtr++;
            length--;
        }

        /* skip past EOL token */
        ASSERT(*srcPtr == 0x00 || length <= 0);
        srcPtr++;
        length--;

        RTFNewPara();
    }

done:
    RTFEnd();

    SetResultBuffer(pOutput);
    retval = 0;

//bail:
    return retval;
}


/*
 * ===========================================================================
 *      Merlin 8 and Merlin 8/16 Assembler
 * ===========================================================================
 */

/*
 * Merlin source code uses ordinary text files that usually have names
 * ending in ".S".  They use high ASCII text -- unusual for ProDOS text
 * files -- with the occasional low-ASCII space character.
 *
 * We don't absolutely need this conversion, because the files are already
 * plain text, but it's easier to read when the various pieces are tabbed
 * to reasonable screen offsets.
 *
 * The 0xa0 values seem to be used to separate pieces, while the 0x20
 * values are used for comments and other filler.  It is entirely possible
 * to have a Merlin source file with no 0x20 values.
 */

/*
 * Decide whether or not we want to handle this file.  We know it's type
 * TXT, though the aux type can be almost anything.
 *
 * If we really just want Merlin we should probably exclude DOS disks,
 * since the text file contents will match.  However, it's probably useful
 * to support DOS ED/ASM sources with this.
 */
void
ReformatMerlin::Examine(ReformatHolder* pHolder)
{
    if (pHolder->GetFileType() == kTypeTXT) {
        bool isAsm = ReformatMerlin::IsMerlin(pHolder);
        bool isDotS = strcasecmp(pHolder->GetNameExt(), ".S") == 0;

        if (isAsm && isDotS) {
            /* gotta be */
            pHolder->SetApplic(ReformatHolder::kReformatMerlin,
                ReformatHolder::kApplicYes,
                ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
        } else if (isAsm) {
            /* probably Merlin assembler, or at least *some* sort of asm */
            pHolder->SetApplic(ReformatHolder::kReformatMerlin,
                ReformatHolder::kApplicProbably,
                ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
        } else if (isDotS) {
            /* not likely, but offer it as non-default option */
            pHolder->SetApplic(ReformatHolder::kReformatMerlin,
                ReformatHolder::kApplicProbablyNot,
                ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
        } else {
            /* probably not Merlin, don't allow */
            pHolder->SetApplic(ReformatHolder::kReformatMerlin,
                ReformatHolder::kApplicNot,
                ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
        }
    } else {
        /* not S-C assembler */
        pHolder->SetApplic(ReformatHolder::kReformatMerlin,
            ReformatHolder::kApplicNot,
            ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
    }
}

/*
 * Figure out if the contents of this file match up with our expections
 * for Merlin source code.
 *
 * Specifically, does it use high ASCII and 0x20 exclusively, and does
 * it have a large number of lines that begin with a single space or the
 * comment token ('*')?
 *
 * Typical source files start with a space on 40-60% of lines, but "equates"
 * files and files that are substantially comments break the rule.
 *
 * This will also return "true" for DOS ED/ASM files.
 */
/*static*/ bool
ReformatMerlin::IsMerlin(const ReformatHolder* pHolder)
{
    const unsigned char* ptr = pHolder->GetSourceBuf(ReformatHolder::kPartData);
    long srcLen = pHolder->GetSourceLen(ReformatHolder::kPartData);

    bool isLineStart = true;
    int lineCount, spaceLineCount, commentLineCount;

    lineCount = spaceLineCount = commentLineCount = 0;
    while (srcLen--) {
        if ((*ptr & 0x80) == 0 && (*ptr != 0x20)) {
            WMSG1("  Merlin: not, found 0x%02x\n", *ptr);
            return false;
        }

        if (isLineStart) {
            lineCount++;

            if ((*ptr & 0x7f) == 0x20 && srcLen != 0 &&
                (*(ptr+1) & 0x7f) != 0x20)
                spaceLineCount++;
            if (*ptr == 0xaa)       // '*'
                commentLineCount++;
            isLineStart = false;
        }

        if (*ptr == 0x8d)
            isLineStart = true;

        ptr++;
    }

    if (!lineCount)
        return false;       // don't divide by zero

    WMSG1("  Merlin: found %d lines\n", lineCount);
    WMSG4("    %d start with spaces (%.3f%%), %d with comments (%.3f%%)\n",
        spaceLineCount, (spaceLineCount * 100.0) / lineCount,
        commentLineCount, (commentLineCount * 100.0) / lineCount);

    if ((spaceLineCount * 100) / lineCount > 40)
        return true;
    if (((spaceLineCount + commentLineCount) * 100) / lineCount > 50)
        return true;
    return false;
}


/*
 * Re-tab a Merlin assembly file.
 *
 * We try to track quoted material on the operand field to avoid tabbing
 * parts of quoted text around.  This isn't strictly necessary for a well-
 * formed Merlin file, which uses 0x20 as a "non-breaking space", but if it
 * has been "washed" through a converter or if this is actually a DOS ED/ASM
 * file, tracking quotes is almost always beneficial.
 */
int
ReformatMerlin::Process(const ReformatHolder* pHolder,
    ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part,
    ReformatOutput* pOutput)
{
    const unsigned char* srcPtr = pHolder->GetSourceBuf(part);
    long srcLen = pHolder->GetSourceLen(part);
    long length = srcLen;
    int retval = -1;
    enum { kStateLabel, kStateMnemonic, kStateOperand, kStateComment };
    int tabStop[] = { 0, 9, 15, 26 };   // 1:1 map with state enum
    int state;
    unsigned char quoteChar = '\0';

    fUseRTF = false;

    RTFBegin();

    bool isLineStart = true;
    for ( ; srcLen > 0; srcLen--, srcPtr++) {
        if (isLineStart) {
            isLineStart = false;
            OutputStart();      // begin new line in output buffer
            state = kStateLabel;
            if (*srcPtr == 0xaa)
                state = kStateComment;
        }
        if (*srcPtr == 0x8d) {
            OutputFinish();     // end of line

            BufPrintf("%s", GetOutBuf());
            RTFNewPara();

            isLineStart = true;
            if (quoteChar != '\0') {
                DebugBreak();
                quoteChar = '\0';
            }
            continue;
        }

        if (state >= kStateComment) {
            Output(*srcPtr & 0x7f);
        } else if (quoteChar != '\0') {
            if (*srcPtr == quoteChar) {
                /* close quote */
                quoteChar = '\0';
            }
            Output(*srcPtr & 0x7f);
        } else if (state == kStateOperand &&
                   (*srcPtr == '\'' + 0x80 || *srcPtr == '"' + 0x80))
        {
            /* open quote */
            quoteChar = *srcPtr;
            Output(quoteChar & 0x7f);
        } else if (*srcPtr == 0xa0) {       // high-ASCII space
            // does not trigger on 0x20; this matches behavior of
            // Merlin-16 v3.40
            state++;
            OutputTab(tabStop[state]);
        } else if (*srcPtr == 0xbb) {       // high-ASCII ';'
            // just comment, or comment on mnemonic w/o operand
            // (shouldn't tab out if line started with label but
            // contains 0x20s instead of 0xa0s between components;
            // oh well.)
            state = kStateComment;
            OutputTab(tabStop[state]);
            Output(*srcPtr & 0x7f);
        } else {
            Output(*srcPtr & 0x7f);
        }
    }

//done:
    RTFEnd();

    SetResultBuffer(pOutput);
    retval = 0;

//bail:
    return retval;
}


/*
 * ===========================================================================
 *      LISA Assembler - v2.x
 * ===========================================================================
 */

/*
 * This is for LISA v2.5 and earlier, which ran under DOS 3.3.  It used a
 * fairly simple format with tokenized mnemonics.
 *
 * The conversion was created by examination of the source files.  The table
 * of mnemonics was extracted from the assembler binary. 
 */

/*
 * Decide whether or not we want to handle this file.
 */
void
ReformatLISA2::Examine(ReformatHolder* pHolder)
{
    if (pHolder->GetSourceFormat() == ReformatHolder::kSourceFormatDOS &&
        pHolder->GetFileType() == kTypeDOS_B)
    {
        if (ReformatLISA2::IsLISA(pHolder)) {
            /* definitely LISA */
            pHolder->SetApplic(ReformatHolder::kReformatLISA2,
                ReformatHolder::kApplicYes,
                ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
        } else {
            /* maybe LISA */
            pHolder->SetApplic(ReformatHolder::kReformatLISA2,
                ReformatHolder::kApplicMaybe,
                ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
        }
    } else {
        /* not LISA */
        pHolder->SetApplic(ReformatHolder::kReformatLISA2,
            ReformatHolder::kApplicNot,
            ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
    }
}

/*
 * Quick sanity check on the file contents.
 */
bool
ReformatLISA2::IsLISA(const ReformatHolder* pHolder)
{
    const unsigned char* srcPtr = pHolder->GetSourceBuf(ReformatHolder::kPartData);
    long srcLen = pHolder->GetSourceLen(ReformatHolder::kPartData);
    unsigned short version, len;

    if (srcLen < 8)
        return false;

    version = Read16(&srcPtr, &srcLen);
    len = Read16(&srcPtr, &srcLen);

    if (len > srcLen)
        return false;

    return true;
}


/*
 * Opcode mnemonics.
 */
static const char gOpcodes[] = 
    "BGEBLTBMIBCCBCSBPLBNEBEQ"      // 80-87
    "BVSBVCBSBBNMBM1BNZBIZBIM"      // 88-8f
    "BIPBICBNCBRABTRBFLBRKBKS"      // 90-97
    "CLVCLCCLDCLIDEXDEYINXINY"      // 98-9f
    "NOPPHAPLAPHPPLPRTSRTIRSB"      // a0-a7
    "RTNSECSEISEDTAXTAYTSXTXA"      // a8-af
    "TXSTYAADDCPRDCRINRSUBLDD"      // b0-b7
    "POPPPDSTDSTPLDRSTOSET___"      // b8-bf
    "ADCANDORABITCMPCPXCPYDEC"      // c0-c7
    "EORINCJMPJSR___LDALDXLDY"      // c8-cf
    "STASTXSTYXORLSRRORROLASL"      // d0-d7
    "ADREQUORGOBJEPZSTRDCMASC"      // d8-df
    "ICLENDLSTNLSHEXBYTHBYPAU"      // e0-e7
    "DFSDCI...PAGINVBLKDBYTTL"      // e8-ef
    "SBC___LET.IF.EL.FI=  PHS"      // f0-f7
    "DPH.DAGENNOGUSR_________"      // f8-ff
    ;


/*
 * Format:
 *  2-byte version (?)
 *  2-byte length
 *  <LINE> ...
 *
 * Each line is:
 *  1-byte length
 *  <DATA>
 *  CR
 *
 * Last line has length=255.
 */

/*
 * Parse a file.
 */
int
ReformatLISA2::Process(const ReformatHolder* pHolder,
    ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part,
    ReformatOutput* pOutput)
{
    const unsigned char* srcPtr = pHolder->GetSourceBuf(part);
    long srcLen = pHolder->GetSourceLen(part);
    long actualLen;
    int retval = -1;

    fUseRTF = false;

    if (srcLen < 8) {
        WMSG0("  LISA truncated?\n");
        goto bail;
    }

    unsigned short version;

    version = Read16(&srcPtr, &srcLen);     // usually 0x1800; maybe "2.4"?
    actualLen = Read16(&srcPtr, &srcLen);

    WMSG2("  LISA version 0x%04x, len=%d\n", version, actualLen);

    if (actualLen > srcLen) {
        WMSG2("  LISA bad length (len=%ld actual=%ld)\n", srcLen, actualLen);
        goto bail;
    }

    int lineNum;
    lineNum = 0;
    while (actualLen > 0) {
        int lineLen = *srcPtr;
        if (lineLen == 0) {
            WMSG1("  LISA bad line len (%ld)\n", lineLen);
            break;
        } else if (lineLen == 255) {
            // used as end-of-file marker
            break;
        }

        lineNum++;

        OutputStart();
        ProcessLine(srcPtr);
        OutputFinish();

        //BufPrintf("%4d %s\r\n", lineNum, GetOutBuf());
        BufPrintf("%s\r\n", GetOutBuf());

        srcPtr += lineLen+1;
        actualLen -= lineLen+1;
    }

    SetResultBuffer(pOutput);
    retval = 0;

bail:
    return retval;
}

void
ReformatLISA2::ProcessLine(const unsigned char* buf)
{
    int len = *buf;
    unsigned char uch;

    // consume length byte
    buf++;
    len--;

    if (*buf >= 0x80) {
        // starting the opcode, tab past label field
        OutputTab(kOpTab);
    } else if (*buf != ';' && len > 8) {
        // starting with 8-character label
        bool doPrint = true;
        for (int i = 0; i < 8; i++) {
            uch = *buf;
            if (uch < 0x20 || uch >= 0x80) {
                WMSG1("  LISA funky char 0x%02x in label\n", uch);
                break;
            } else if (uch == 0x20) {
                doPrint = false;
            }
            if (doPrint)
                Output(uch);
            buf++;
            len--;
        }
        if (len > 0 && *buf == ':') {
            Output(*buf);
            buf++;
            len--;
        }
        OutputTab(kOpTab);
    }

    bool mnemonicDone = false;
    bool operandDone = false;
    while (len--) {
        uch = *buf++;

        if (uch >= 0x20 && uch < 0x80) {
            if (mnemonicDone && uch != 0x20)
                operandDone = true;
            if (mnemonicDone && !operandDone && uch == 0x20) {
                // suppress extra spaces between mnemonic and operand
            } else
                Output(uch);
        } else if (uch < 0x20) {
            // Values from 0x01 - 0x05 are used to separate the opcode from
            // the operand, and seem to "hint" the operand type (immediate,
            // absolute, etc).  Just ignore for now.
        } else if (uch == 0x0d) {
            // don't output CR to line buf
            if (len) {
                WMSG0("WARNING: got early CR\n");
            }
        } else if (mnemonicDone) {
            // Values >= 0x80 are mnemonics, but we've already seen it.
            // LISA seems to use 0xbb to separate operand and comment field
            // (would be "STP" mnemonic).  I don't see other uses, so I'm
            // just going to tab over instead of outputing a second
            // mnemonic value.
            if (len > 1) {
                OutputTab(kComTab);
                Output(';');
            }
        } else {
            const char* mnemonic;

            mnemonic = &gOpcodes[(uch - 128) * 3];
            Output(mnemonic[0]);
            Output(mnemonic[1]);
            Output(mnemonic[2]);
            OutputTab(kAdTab);
            mnemonicDone = true;
        }
    }
}


/*
 * ===========================================================================
 *      LISA Assembler - v4 and v5
 * ===========================================================================
 */

/*
 * The ProDOS version of LISA uses the INT filetype with the assembler
 * version number in the aux type.  The version is always < $4000.
 *
 * The file format looks like this:
 *  4-byte header
 *  symbol dictionary, 8 bytes per symbol
 *  <line> ...
 *
 * The way the lines are decoded is fairly involved.  The code here was
 * developed from the LISA v3.2a sources, as found on the A2ROMulan CD-ROM.
 */

/*
 * Opcode mnemonics.
 */
static const char gMnemonics3[256*3 +1] =
    // 0x00 (SN, M65.2) - Group 1 instructions
    "addadcandcmpeorldaorasbc"
    "stasubxor"
    // 0x0b - Group 2 instructions
             "asldecinclsrrol"
    "ror"
    // 0x11 - Group 3 instructions
       ".ifwhlbrabccbcsbeqbfl"
    "bgebltbmibnebplbtrbvcbvs"
    "jsrobjorgphs"
    // 0x24 - Group 4 instructions
                ".mdfzrinplcl"
    "rls"
    // 0x29 - Group 5 instructions
       "bitcpxcpyjmpldxldystx"
    "stytrbtsbstz"
    // 0x34 - Group 6 instructions
                "=  conepzequ"
    "set"
    // 0x39 - Group 7 instructions
       ".daadrbytcspdbyhby"
    // 0x3f - Group 8 instructions
                         "anx"
    "sbtttlchnblkdciinvrvsmsg"
    "strzro"
    // 0x4a - Group 9 instructions
          "dfshexusrsav"
    //M65LEN2  equ      * - M65.2
                      "??????"      // 0x4e-0x4f
    "????????????????????????"      // 0x50-0x57
    "????????????????????????"      // 0x58-0x5f
    "????????????????????????"      // 0x60-0x67
    "????????????????????????"      // 0x68-0x6f
    "????????????????????????"      // 0x70-0x77
    "????????????????????????"      // 0x78-0x7f
    "????????????????????????"      // 0x80-0x87
    "????????????????????????"      // 0x88-0x8f
    "????????????????????????"      // 0x90-0x97
    "????????????????????????"      // 0x98-0x9f
    "????????????????????????"      // 0xa0-0xa7
    "????????????????????????"      // 0xa8-0xaf

    // 0xb0 (SS M65.1) - assembler directives
    ".el.fi.me.wedphif1if2end"
    "expgenlstnlsnognoxpagpau"
    "nlccnd   "
    // 0xc2 - Single-byte instructions
             "asllsrrolrordec"
    "incbrkclccldcliclvdexdey"
    "inxinynopphaphpplaplprti"
    "rtssecsedseitaxtaytsxtxa"
    "txstyaphxphyplxply"
    //M65LEN1  equ      * - M65.1

                      "??????"      // 0xe6-0xe7
    "????????????????????????"      // 0xe8-0xef
    "????????????????????????"      // 0xf0-0xff
    "????????????????????????"      // 0xf8-0xff
    ;

/*
 * Determine whether this is one of our files.
 */
void
ReformatLISA3::Examine(ReformatHolder* pHolder)
{
    /*
     * Note we cannot false-positive on an INT file on a DOS disk, because
     * in DOS 3.3 INT files always have zero aux type.
     */
    if (pHolder->GetFileType() == kTypeINT &&
        pHolder->GetAuxType() < 0x4000)
    {
        if (ReformatLISA3::IsLISA(pHolder)) {
            /* definitely LISA */
            pHolder->SetApplic(ReformatHolder::kReformatLISA3,
                ReformatHolder::kApplicYes,
                ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
        } else {
            /* possibly LISA */
            pHolder->SetApplic(ReformatHolder::kReformatLISA3,
                ReformatHolder::kApplicMaybe,
                ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
        }
    } else {
        /* not LISA */
        pHolder->SetApplic(ReformatHolder::kReformatLISA3,
            ReformatHolder::kApplicNot,
            ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
    }
}

/*
 * Decide if this is one of ours or perhaps an Integer BASIC or S-C
 * assembler source.
 */
/*static*/ bool
ReformatLISA3::IsLISA(const ReformatHolder* pHolder)
{
    bool dosStructure = (pHolder->GetSourceFormat() == ReformatHolder::kSourceFormatDOS);
    const unsigned char* srcPtr = pHolder->GetSourceBuf(ReformatHolder::kPartData);
    long srcLen = pHolder->GetSourceLen(ReformatHolder::kPartData);

    if (pHolder->GetSourceFormat() == ReformatHolder::kSourceFormatDOS)
        return false;       // can only live under ProDOS; need len + aux type

    if (srcLen < kHeaderLen+2)
        return false;       // too short

    unsigned short codeLen, symLen;

    codeLen = srcPtr[0x00] | srcPtr[0x01] << 8;
    symLen = srcPtr[0x02] | srcPtr[0x03] << 8;

    if ((symLen & 0x0003) != 0 || symLen > 512*8 || symLen > srcLen) {
        WMSG0("  LISA3 bad symLen\n");
        return false;
    }
    if (codeLen > srcLen) {
        WMSG0("  LISA3 funky codeLen\n");
        return false;
    }
    if (codeLen + symLen + kHeaderLen > srcLen) {
        WMSG0("  LISA3 bad combined len\n");
        return false;
    }

    return true;
}

/*
 * Parse a file.
 */
int
ReformatLISA3::Process(const ReformatHolder* pHolder,
    ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part,
    ReformatOutput* pOutput)
{
    const unsigned char* srcPtr = pHolder->GetSourceBuf(part);
    long srcLen = pHolder->GetSourceLen(part);
    int retval = -1;

    if (srcLen < kHeaderLen+2) {
        WMSG0("  LISA3 too short\n");
        goto bail;
    }

    fUseRTF = false;

    unsigned short codeLen, symLen;

    codeLen = srcPtr[0x00] | srcPtr[0x01] << 8;
    symLen = srcPtr[0x02] | srcPtr[0x03] << 8;

    printf("codeLen=%d, symLen=%d\n", codeLen, symLen);

    if ((symLen & 0x0003) != 0 || symLen > 512*8 || symLen > srcLen) {
        WMSG0("  LISA3 bad symLen\n");
        goto bail;
    }
    if (codeLen > srcLen) {
        WMSG0("  LISA3 funky codeLen\n");
        goto bail;
    }
    if (codeLen + symLen + kHeaderLen > srcLen) {
        WMSG0("  LISA3 bad combined len\n");
        goto bail;
    }

    fSymCount = symLen / 8;
    fSymTab = srcPtr + kHeaderLen;
#if 0
    int ii;
    for (ii = 0; ii < fSymCount; ii++) {
        OutputStart();
        PrintSymEntry(ii);
        OutputFinish();
        WMSG2("%d: %hs\n", ii, GetOutBuf());
    }
#endif

    /*
     * Do stuff with source lines.
     */
    const unsigned char* codePtr;
    const unsigned char* endPtr;
    int lineNum;

    codePtr = srcPtr + kHeaderLen + symLen;
    endPtr = srcPtr + srcLen;
    assert(codePtr < endPtr);
    lineNum = 0;

    while (codePtr < endPtr) {
        unsigned char flagByte;
        int lineLen;

        OutputStart();
        lineNum++;

#if 0
        {
            char offbuf[12];
            sprintf(offbuf, "0x%04x", codePtr - srcPtr);
            Output(offbuf);
            Output('-');
        }
#endif

        flagByte = *codePtr++;
        if (flagByte < 0x80) {
            /* BIGONE - explicit length, complex line */
            lineLen = flagByte;
            /* subtract 1 from flagByte, because len includes flagByte */
            if (flagByte > 0)
                ProcessLine(codePtr, flagByte-1);
        } else {
            /* SPCLCASE - locals, labels, comments */
            if (flagByte >= kLCLTKN) {
                lineLen = 1;
                if (flagByte == kCMNTTKN+1) {
                    Output(';');
                } else if (flagByte == kCMNTTKN) {
                    Output('*');
                } else if (flagByte < kLBLTKN) {
                    /* CNVRTLCL: 0xf0 - 0xf9 - local numeric labels */
                    Output('^');
                    Output('0' + flagByte - 0xf0);
                    Output(':');
                } else if (flagByte < kMACTKN) {
                    /* normal label; 0xfb means add 256 */
                    int idx;
                    idx = *codePtr | (flagByte & 0x01) << 8;
                    PrintSymEntry(idx);
                    Output(':');
                    lineLen = 2;
                } else {
                    /* macro (only object on line) */
                    assert(flagByte == kMACTKN || flagByte == kMACTKN+1);
                    OutputTab(kOpTab);
                    int idx;
                    idx = *codePtr | (flagByte & 0x01) << 8;
                    Output('/');        // MACROCHR
                    PrintSymEntry(idx);
                    lineLen = 2;
                }
            } else {
                /* SHRTMNM2 - simple, standard mnemonic */
                lineLen = 1;
                OutputTab(kOpTab);
                PrintMnemonic(flagByte);
            }
        }
        

        if (lineLen == 0) {
            /* end of file */
            break;
        }

        OutputFinish();
        //BufPrintf("%d: %s\r\n", lineNum, outBuf);
        BufPrintf("%s\r\n", GetOutBuf());

        codePtr += lineLen-1;
    }

    WMSG3("codePtr=%p endPtr=%p numLines=%d\n", codePtr, endPtr, lineNum-1);
    WMSG1("extra = %d\n", endPtr - codePtr);

    SetResultBuffer(pOutput);
    retval = 0;

bail:
    fSymTab = nil;
    return retval;
}


/*
 * BIGONE
 */
void
ReformatLISA3::ProcessLine(const unsigned char* codePtr, int len)
{
    unsigned char mnemonic = 0;

    //printf("{code=0x%02x len=%d}", *codePtr, len);
    if (*codePtr == kCMNTTKN+1 || *codePtr == kCMNTTKN) {
        switch (*codePtr) {
        case kCMNTTKN+1:    Output(';');    break;
        case kCMNTTKN:      Output('*');    break;
        default:
            assert(false);
        }
        // CNVCMNT
        codePtr++;
        while (--len)
            Output(*codePtr++ & 0x7f);

        goto bail;
    } else if (*codePtr == kMACTKN || *codePtr == kMACTKN+1) {
        /* CHKMACRO - handle macro */
        unsigned short idx;
        mnemonic = *codePtr;
        idx = (*codePtr & 0x01) << 8;
        idx |= *++codePtr;
        OutputTab(kOpTab);
        Output('/');        // MACROCHR
        PrintSymEntry(idx);
        codePtr++;
        len -= 2;
        goto ConvtOperand;
    } else if (*codePtr == kLBLTKN || *codePtr == kLBLTKN+1) {
        /* CHKCLBL - handle label at start of line */
        unsigned short idx;
        idx = (*codePtr & 0x01) << 8;
        idx |= *++codePtr;
        PrintSymEntry(idx);
        codePtr++;
        len -= 2;
        // goto ConvtMnem
    } else if (*codePtr >= kLCLTKN) {
        /* CHKLLBL - handle local label (^) */
        Output('^');
        Output((char) (*codePtr - 0xc0));
        codePtr++;
        len--;
        // goto CNVTMNEM
    } else {
        /* no label; current value is the mnemonic; continue w/o advancing */
        // fall through to CNVTMNEM
    }

    /* CNVTMNEM */
    mnemonic = *codePtr++;
    len--;
    //printf("{mne=0x%02x}", mnemonic);
    if (mnemonic >= kMACTKN) {
        /* CNVRTMAC */
        assert(mnemonic == kMACTKN || mnemonic == kMACTKN+1);
        OutputTab(kOpTab);
        int idx;
        idx = *codePtr++;
        idx |= (mnemonic & 0x01) << 8;
        Output('/');        // MACROCHR
        PrintSymEntry(idx);
        len--;
        //printf("{MAC:%d}", len);
    } else {
        OutputTab(kOpTab);
        PrintMnemonic(mnemonic);
    }

ConvtOperand:
    /* ConvtOperand */
    //printf("{cen=%d}", len);
    ConvertOperand(mnemonic, &codePtr, &len);

bail:
    //if (len > 0)
    //    WMSG1("{LEN=%d}", len);

    return;
}


/*
 * CNVOPRND
 */
void
ReformatLISA3::ConvertOperand(unsigned char mnemonic,
    const unsigned char** pCodePtr, int* pLen)
{
    static const char kOPRTRST1[] = "+-*/&|^=<>%<><";
    static const char kOPRTRST2[] = "\0\0\0\0\0\0\0\0\0\0\0==>";

    const unsigned char* codePtr = *pCodePtr;
    int len = *pLen;
    OperandResult result;
    unsigned char adrsMode = 0;
    unsigned char val;

    //printf("{opr len=%d}", len);

    if (mnemonic >= kCMNTTKN) {
        /* OUTCMNT2 */
        PrintComment(adrsMode, codePtr, len);
        goto bail;
    }
    if (mnemonic < kSS) {
        if (mnemonic < kGROUP3 || (mnemonic >= kGROUP5 && mnemonic < kGROUP6)) {
            // address mode is explicit
            adrsMode = *codePtr++;
            len--;
            //printf("{adrs=0x%02x}", adrsMode);
        }
    }
    OutputTab(kAdTab);
    if (adrsMode >= 0x10 && adrsMode < 0x80)
        Output('(');

    /* OUTOPRND */
    while (len > 0) {
        val = *codePtr++;
        len--;

        if (val == 0x0e) {
            Output('~');
            continue;       // goto OutOprnd
        } else if (val == 0x0f) {
            Output('-');
            continue;       // goto OutOprnd
        } else if (val == 0x3a) {
            Output('#');
            continue;       // goto OutOprnd
        } else if (val == 0x3b) {
            Output('/');
            continue;       // goto OutOprnd
        } else if (val == 0x3d) {
            Output('@');
            continue;       // goto OutOprnd
        } else {
            result = PrintNum(adrsMode, val, &codePtr, &len);
            if (result == kResultGotoOutOprnd)
                continue;   //goto OutOprnd;
            else if (result == kResultFailed)
                goto bail;
            // else goto OutOprtr
        }

OutOprtr:
        unsigned char opr;

        if (!len)
            break;
        opr = *codePtr++;
        len--;

        if (opr < 0x0e) {
            Output(' ');
            Output(kOPRTRST1[opr]);
            if (kOPRTRST2[opr] != '\0')
                Output(kOPRTRST2[opr]);
            Output(' ');
            // goto OutOprnd
        } else if (opr < 0x20 || opr >= 0x30) {
            // NOOPRTR
            if (opr == kCMNTTKN+1) {
                PrintComment(adrsMode, codePtr, len);
                codePtr += len;
                len = 0;
                goto bail;
            }
            Output(',');
            codePtr--;      // back up
            len++;
            // goto OutOprnd
        } else {
            Output('+');
            result = PrintNum(adrsMode, opr - 0x10, &codePtr, &len);
            if (result == kResultGotoOutOprnd)
                continue;
            else if (result == kResultGotoOutOprtr)
                goto OutOprtr;
            else
                goto bail;
        }
    }
    PrintComment(adrsMode, codePtr, len);

bail:
    *pCodePtr = codePtr;
    *pLen = len;
}

/*
 * Output a single byte as a binary string.
 */
void
ReformatLISA3::PrintBin(unsigned char val)
{
    char buf[9];
    buf[8] = '\0';

    for (int bit = 0; bit < 8; bit++)
        buf[bit] = '0' + ((val >> (7-bit)) & 0x01);
    Output(buf);
}

/*
 * OUTNUM
 */
ReformatLISA3::OperandResult
ReformatLISA3::PrintNum(int adrsMode, unsigned char val,
    const unsigned char** pCodePtr, int* pLen)
{
    const unsigned char* codePtr = *pCodePtr;
    int len = *pLen;
    OperandResult result = kResultUnknown;
    char numBuf[12];

    // OUTNUM - these all jump to OutOprtr unless otherwise specified
    if (val < 0x1a) {
        Output(val | '0');
    } else if (val == 0x1a) {
        // 1-byte decimal
        sprintf(numBuf, "%u", *codePtr++);
        Output(numBuf);
        len--;
    } else if (val == 0x1b) {
        // 2-byte decimal
        unsigned short num;
        num = *codePtr++;
        num |= *codePtr++ << 8;
        len -= 2;
        sprintf(numBuf, "%u", num);
        Output(numBuf);
    } else if (val == 0x1c) {
        // 1-byte hex
        Output('$');
        sprintf(numBuf, "%02X", *codePtr++);
        Output(numBuf);
        len--;
    } else if (val == 0x1d) {
        // 2-byte hex
        Output('$');
        unsigned short num;
        num = *codePtr++;
        num |= *codePtr++ << 8;
        sprintf(numBuf, "%04X", num);
        Output(numBuf);
        len -= 2;
    } else if (val == 0x1e) {
        Output('%');
        PrintBin(*codePtr++);
        len--;
    } else if (val == 0x1f) {
        Output('%');
        PrintBin(*codePtr++);
        PrintBin(*codePtr++);
        len -= 2;
    } else if (val >= 0x36 && val <= 0x39) {
        // OUTIMD
        if (val == 0x36 || val == 0x37)
            Output('#');
        else
            Output('/');
        int idx;
        idx = (val & 0x01) << 8;
        idx |= *codePtr++;
        PrintSymEntry(idx);
        len--;
    } else if (val == 0x3c) {
        Output('*');        // loc cntr token
    } else if (val < 0x4a) {
        // <0..<9 tokens
        Output('<');
        Output(val - 0x10);
    } else if (val < 0x50) {
        // ?0..?5 tokens (+0x66)
        Output('?');
        Output(val - 0x1a);
    } else if (val < 0x5a) {
        Output('>');
        Output(val - 0x20);
    } else if (val < 0x60) {
        // ?6..?9 tokens (+0x5c)
        unsigned char newVal = val - 0x24;
        Output('?');
        if (newVal == ';')
            Output('#');
        else
            Output(newVal);
        if (newVal == ':')
            result = kResultGotoOutOprnd;
    } else if (val < 0x80) {
        // String tokens
        int strLen = val & 0x1f;
        if (strLen == 0) {
            // explict length
            strLen = *codePtr++;
            len--;
        }
        if (strLen > len) {
            Output("!BAD STR!");
            DebugBreak();
            result = kResultFailed;
            goto bail;
        }
        char delim;
        if (*codePtr >= 0x80)
            delim = '"';
        else
            delim = '\'';
        Output(delim);
        while (strLen--) {
            if ((*codePtr & 0x7f) == delim)
                Output(delim);
            Output(*codePtr++ & 0x7f);
            len--;
        }
        Output(delim);
    } else if (val == kLBLTKN || val == kLBLTKN+1) {
        int idx;
        idx = (val & 0x01) << 8;
        idx |= *codePtr++;
        len--;
        PrintSymEntry(idx);
    } else if (val == kCMNTTKN+1) {
        /* OUTCMNT2 */
        PrintComment(adrsMode, codePtr, len);
        codePtr += len;
        len = 0;
    } else {
        // just go to OutOprtr
    }

    if (result == kResultUnknown)
        result = kResultGotoOutOprtr;

bail:
    *pCodePtr = codePtr;
    *pLen = len;
    return result;
}

/*
 * Print symbol table entry.  Each entry is an 8-byte label packed into
 * 6 bytes.
 */
void
ReformatLISA3::PrintSymEntry(int ent)
{
    if (ent < 0 || ent >= fSymCount) {
        Output("!BAD SYM!");
        WMSG2("invalid entry %d (max %d)\n", ent, fSymCount);
        DebugBreak();
        return;
    }

    const unsigned char* packed = &fSymTab[ent * 8];
    unsigned char tmp[8];
    int i;

    tmp[0] = packed[0] >> 2;
    tmp[1] = ((packed[0] << 4) & 0x3c) | packed[1] >> 4;
    tmp[2] = ((packed[1] << 2) & 0x3c) | packed[2] >> 6;
    tmp[3] = packed[2] & 0x3f;

    tmp[4] = packed[3] >> 2;
    tmp[5] = ((packed[3] << 4) & 0x3c) | packed[4] >> 4;
    tmp[6] = ((packed[4] << 2) & 0x3c) | packed[5] >> 6;
    tmp[7] = packed[5] & 0x3f;

    for (i = 0; i < 8; i++) {
        if (tmp[i] == 0x20)
            break;
        else if (tmp[i] >= 0x20)
            Output(tmp[i]);
        else
            Output(tmp[i] | 0x40);
    }
}

void
ReformatLISA3::PrintMnemonic(unsigned char val)
{
    const char* ptr = &gMnemonics3[val * 3];
    Output(ptr[0]);
    Output(ptr[1]);
    Output(ptr[2]);
}

/*
 * OUTCMNT2
 *
 * Prints the comment.  Finishes off the operand if necessary.
 */
void
ReformatLISA3::PrintComment(int adrsMode, const unsigned char* codePtr, int len)
{
    assert(len >= 0);

    if (adrsMode == 0x04)
        Output(",X");
    else if (adrsMode == 0x08)
        Output(",Y");
    else if (adrsMode == 0x10)
        Output(')');
    else if (adrsMode == 0x20)
        Output(",X)");
    else if (adrsMode == 0x40)
        Output("),Y");

    if (len > 0) {
        OutputTab(kComTab);
        Output(';');
        while (len--)
            Output(*codePtr++ & 0x7f);
    }
}


/*
 * ===========================================================================
 *      LISA Assembler - v4 and v5
 * ===========================================================================
 */

/*
 * The ProDOS / GS/OS version of LISA uses the INT filetype with the
 * assembler version number in the aux type.  The version is always > $4000.
 *
 * The file format looks like this:
 *  16-byte header
 *  symbol dictionary
 *  <line> ...
 *
 * The way the lines are decoded is fairly involved.  The code here was
 * developed from the LISA/816 v5.0a (433) sources, as found on
 * the A2ROMulan CD-ROM.
 */

/*
 * Determine whether this is one of our files.
 */
void
ReformatLISA4::Examine(ReformatHolder* pHolder)
{
    /*
     * Note we cannot false-positive on an INT file on a DOS disk, because
     * in DOS 3.3 INT files always have zero aux type.
     */
    if (pHolder->GetFileType() == kTypeINT &&
        pHolder->GetAuxType() >= 0x4000)
    {
        if (ReformatLISA4::IsLISA(pHolder)) {
            /* definitely LISA */
            pHolder->SetApplic(ReformatHolder::kReformatLISA4,
                ReformatHolder::kApplicYes,
                ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
        } else {
            /* possibly LISA */
            pHolder->SetApplic(ReformatHolder::kReformatLISA4,
                ReformatHolder::kApplicMaybe,
                ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
        }
    } else {
        /* not LISA */
        pHolder->SetApplic(ReformatHolder::kReformatLISA4,
            ReformatHolder::kApplicNot,
            ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
    }
}

/*
 * Decide if this is one of ours or perhaps an Integer BASIC or S-C
 * assembler source.
 */
/*static*/ bool
ReformatLISA4::IsLISA(const ReformatHolder* pHolder)
{
    bool dosStructure = (pHolder->GetSourceFormat() == ReformatHolder::kSourceFormatDOS);
    const unsigned char* srcPtr = pHolder->GetSourceBuf(ReformatHolder::kPartData);
    long srcLen = pHolder->GetSourceLen(ReformatHolder::kPartData);

    if (pHolder->GetSourceFormat() == ReformatHolder::kSourceFormatDOS)
        return false;       // can only live under ProDOS; need len + aux type

    if (srcLen < kHeaderLen+2)
        return false;       // too short

    unsigned short version;
    unsigned short symEnd;
    unsigned short symCount;

    version = srcPtr[0x00] | srcPtr[0x01] << 8;
    symEnd = srcPtr[0x02] | srcPtr[0x03] << 8;
    symCount = srcPtr[0x04] | srcPtr[0x05] << 8;

    if (symEnd > srcLen) {
        WMSG0("  LISA4 bad symEnd\n");
        return false;
    }
    if (symCount > symEnd) {
        WMSG2("  LISA4 funky symCount (count=%d end=%d)\n",
            symCount, symEnd);
        return false;;
    }

    unsigned char opTab, adTab, comTab;
    opTab = srcPtr[0x06];
    adTab = srcPtr[0x07];
    comTab = srcPtr[0x08];

    if (opTab < 1 || adTab < 2 || comTab < 3) {
        WMSG0("  LISA4 missing tabs\n");
        return false;
    }
    if (opTab >= 128 || adTab >= 128 || comTab >= 128) {
        WMSG0("  LISA4 huge tabs\n");
        return false;
    }

    return true;
}

static const char* gHexDigit = "0123456789ABCDEF";


/*
 * Table of mnemonics, from v5.0a editor sources.
 *
 * Some entries were not present in the editor sources, but were used
 * by sample source code, and have been added here:
 *  0x6c .assume
 *  0x7f .table
 */
static const char* gMnemonics4[] = {
    // 00 - 0f
    "???", "add", "adc", "and", "cmp", "eor", "lda", "ora",
    "sbc", "sta", "sub", "xor", "asl", "dec", "inc", "lsr",
    // 10 - 1f
    "rol", "ror", ".if", "whl", ".go", "bra", "bcc", "bcs",
    "beq", "bfl", "bge", "blt", "bmi", "bne", "bpl", "btr",
    // 20 - 2f
    "bvc", "bvs", "obj", "org", "phs", ".db", "pea", "per",
    "brl", ".md", "far", "fdr", "fzr", "inp", "lcl", "rls",
    // 30 - 3f
    "bit", "cpx", "cpy", "ldx", "ldy", "stx", "sty", "trb",
    "tsb", "stz", "pei", "rep", "sep", "jmp", "jsr", "jml",
    // 40 - 4f
    "jsl", "mvn", "mvp", "= ", "con", "epd", "epz", "eql",
    "equ", "set", ".da", "adr", "byt", "csp", "dby", "hby",
    // 50 - 5f
    "bby", "anx", "chn", "icl", "lib", "lnk", "msg", "psm",
    "rlb", "sbt", "ttl", "dci", "rvs", "str", "zro", "dfs",
    // 60 - 6f
    "hex", "usr", "sav", ".tf", "seg", "cpu", ".entry", ".ref",
    ".group", ".deref", "long", NULL, ".assume", NULL, NULL, NULL,
    // 70 - 7f
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, ".table",
    // 80 - 8f
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    // 90 - 9f
    ".el", ".fi", ".me", ".we", ".la", ".lx", ".sa", ".sx",
    "dph", "if1", "if2", "end", "exp", "gen", "lst", "nls",
    // a0 - af
    "nog", "nox", "pag", "pau", "nlc", "cnd", "asl", "lsr",
    "rol", "ror", "dec", "inc", "mvn", "mvp", "brk", "clc",
    // b0 - bf
    "cld", "cli", "clv", "dex", "dey", "inx", "iny", "nop",
    "pha", "php", "pla", "plp", "rti", "rts", "sec", "sed",
    // c0 - cf
    "sei", "tax", "tay", "tsx", "txa", "txs", "tya", "phx",
    "phy", "plx", "ply", "cop", "phb", "phd", "phk", "plb",
    // d0 - df
    "pld", "rtl", "stp", "swa", "tad", "tas", "tcd", "tcs",
    "tda", "tdc", "tsa", "tsc", "txy", "tyx", "wai", "xba",
    // e0 - ef
    "xce", ".proc", ".endp", ".table", ".endt", NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    // f0 - ff
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
};


/*
 * Parse a file.
 */
int
ReformatLISA4::Process(const ReformatHolder* pHolder,
    ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part,
    ReformatOutput* pOutput)
{
    const unsigned char* srcPtr = pHolder->GetSourceBuf(part);
    long srcLen = pHolder->GetSourceLen(part);
    int retval = -1;

    if (srcLen < kHeaderLen+2) {
        WMSG0("  LISA4 too short\n");
        goto bail;
    }

    fUseRTF = false;

    unsigned short version;
    unsigned short symEnd;

    version = srcPtr[0x00] | srcPtr[0x01] << 8;
    symEnd = srcPtr[0x02] | srcPtr[0x03] << 8;
    fSymCount = srcPtr[0x04] | srcPtr[0x05] << 8;
    fOpTab = srcPtr[0x06];
    fAdTab = srcPtr[0x07];
    fComTab = srcPtr[0x08];
    fCpuType = srcPtr[0x09];

    WMSG3("  LISA4 version = 0x%04x  symEnd=%d  symCount=%d\n",
        version, symEnd, fSymCount);
    WMSG4("  LISA4  opTab=%d adTab=%d comTab=%d cpuType=%d\n",
        fOpTab, fAdTab, fComTab, fCpuType);

    if (symEnd > srcLen) {
        WMSG0("  LISA4 bad symEnd\n");
        goto bail;
    }
    if (fSymCount > symEnd) {
        WMSG0("  LISA4 funky symCount\n");
        goto bail;
    }
    if (fSymCount > 0) {
        fSymTab = new const unsigned char*[fSymCount];
        if (fSymTab == nil)
            goto bail;
    }

    const unsigned char* symPtr;
    const unsigned char* endPtr;
    int symIdx;

    symPtr = srcPtr + kHeaderLen;
    endPtr = srcPtr + symEnd;
    if (symPtr > endPtr) {
        WMSG0("  LISA4 GLITCH: bad symEnd\n");
        goto bail;
    }

    /*
     * Generate symbol table index.
     */
    symIdx = 0;
    while (symPtr < endPtr) {
        if (symIdx < fSymCount)
            fSymTab[symIdx++] = symPtr;
        while (*symPtr != '\0')
            symPtr++;
        symPtr++;
    }
    if (symIdx != fSymCount) {
        WMSG2("  LISA4 err: symIdx is %d, symCount is %d\n", symIdx, fSymCount);
        goto bail;
    }

    WMSG3("  LISA4 symPtr=%p endPtr=%p symIdx=%d\n", symPtr, endPtr, symIdx);

    /*
     * Process source lines.
     */
    const unsigned char* codePtr;
    int lineNum;

    codePtr = srcPtr + symEnd;
    endPtr = srcPtr + srcLen;
    assert(codePtr < endPtr);
    lineNum = 0;

    while (codePtr < endPtr) {
        unsigned char flagByte;
        int lineLen;

        lineNum++;
        OutputStart();

        flagByte = *codePtr++;
        if (flagByte < 0x80) {
            /* explicit length, complex line */
            lineLen = flagByte;
            /* subtract 1 from flagByte, because len includes flagByte */
            if (flagByte > 0)
                ProcessLine(codePtr, flagByte-1);
        } else {
            /* SpecMnem - locals, labels, comments */
            if (flagByte >= kLocalTKN) {
                lineLen = 1;
                if (flagByte == kComntSemiTKN) {
                    Output(';');
                } else if (flagByte == kCommentTKN) {
                    Output('*');
                } else if (flagByte == kBlankTKN) {
                    // just a blank line
                } else if (flagByte < kLabelTKN) {
                    /* 0xf0 - 0xf9 - local numeric labels, e.g. "^1" */
                    Output('^');
                    Output('0' + flagByte - 0xf0);
                } else if (flagByte < kMacroTKN) {
                    /* 0xfa - 0xfb */
                    if (flagByte == 0xfa) {
                        /* label */
                        lineLen = 3;
                        int tmp = *codePtr | *(codePtr+1) << 8;
                        PrintSymEntry(tmp);
                    } else {
                        /* not used?? */
                        assert(lineLen == 1);
                        Output("??? ");
                    }
                } else {
                    /* macro (only object on line) */
                    assert(flagByte == kMacroTKN);
                    OutputTab(fOpTab);
                    int idx;
                    idx = *codePtr | *(codePtr+1) << 8;
                    Output('_');        // MacroChar
                    PrintSymEntry(idx);
                    lineLen = 3;
                }
            } else {
                /* OutMnem - simple, standard mnemonic */
                lineLen = 1;
                OutputTab(fOpTab);
                if (gMnemonics4[flagByte])
                    Output(gMnemonics4[flagByte]);
                else
                    Output("!BAD MNEMONIC!");
            }
        }
        

        if (lineLen == 0) {
            /* end of file */
            break;
        }

        OutputFinish();
        //BufPrintf("%d: %s\r\n", lineNum, GetOutBuf());
        BufPrintf("%s\r\n", GetOutBuf());

        codePtr += lineLen-1;
    }

    WMSG3("  LISA4 codePtr=%p endPtr=%p numLines=%d\n",
        codePtr, endPtr, lineNum-1);
    WMSG1("  LISA4 extra = %d\n", endPtr - codePtr);

    SetResultBuffer(pOutput);
    retval = 0;

bail:
    delete[] fSymTab;
    fSymTab = nil;
    return retval;
}

void
ReformatLISA4::ProcessLine(const unsigned char* codePtr, int len)
{
    unsigned char mnemonic = 0;

    if (*codePtr == kComntSemiTKN || *codePtr == kComntStarTKN ||
        *codePtr == kErrlnTKN)
    {
        switch (*codePtr) {
        case kComntSemiTKN:     Output(';');    break;
        case kComntStarTKN:     Output('*');    break;
        case kErrlnTKN:         Output('!');    break;
        default:
            assert(false);
        }
        codePtr++;
        while (--len)
            Output(*codePtr++ & 0x7f);

        goto bail;
    } else if (*codePtr == kMacroTKN) {
        /* handle macro */
        int idx;
        idx = *++codePtr;
        idx |= *++codePtr << 8;
        OutputTab(fOpTab);
        Output('_');        // MacroChar
        PrintSymEntry(idx);
        codePtr++;
        len -= 3;
        mnemonic = kMacroTKN;
        goto ConvtOperand;
    } else if (*codePtr == kLabelTKN) {
        /* handle label at start of line */
        unsigned short idx;
        idx = *++codePtr;
        idx |= *++codePtr << 8;
        PrintSymEntry(idx);
        codePtr++;
        len -= 3;
        // goto ConvtMnem
    } else if (*codePtr >= kLocalTKN) {
        /* handle local label (^) */
        Output('^');
        Output((char) (*codePtr - 0xc0));
        codePtr++;
        len--;
        // goto ConvtMnem
    } else {
        /* no label; current value is the mnemonic; continue w/o advancing */
        // fall through to ConvtMnem
    }

    /* ConvtMnem */
    mnemonic = *codePtr++;
    len--;
    if (mnemonic >= kMacroTKN) {
        /* OutMacro */
        assert(mnemonic == kMacroTKN);
        OutputTab(fOpTab);
        int idx;
        idx = *codePtr++;
        idx |= *codePtr++ << 8;
        Output('_');        // MacroChar
        PrintSymEntry(idx);
        len -= 2;
        //printf("{MAC:%d}", len);
    } else {
        OutputTab(fOpTab);
        if (gMnemonics4[mnemonic] != NULL)
            Output(gMnemonics4[mnemonic]);
        else {
            Output("!BAD MNEMONIC!");
            WMSG1("  LISA4 bad mnemonic 0x%02x\n", mnemonic);
            DebugBreak();
        }
        if (mnemonic >= kSS) {
            /* CnvMnem2 - mnemonic has no associated operand */
            /* need to fall into ConvertOperand to show comment */
            if (len > 0) {
                /* can only be comment here; skip comment token */
                if (*codePtr != kComntSemiTKN)
                    printf("{SKIP=0x%02x,len=%d}", *codePtr, len);
                codePtr++;
                len--;
            }
        }
    }

ConvtOperand:
    /* ConvtOperand */
    //printf("{cen=%d}", len);
    ConvertOperand(mnemonic, &codePtr, &len);

bail:
    if (len > 0)
        printf("{LEN=%d}", len);

    return;
}

/*
 * ConvtOperand
 */
void
ReformatLISA4::ConvertOperand(unsigned char mnemonic,
    const unsigned char** pCodePtr, int* pLen)
{
    /*
     * Address header char.
     */
    static const char kAdrsModeHeader[] = {
        0, 0, 0, 0, 0, 0, 0, 0, 0,      // 0-8 are null
        '(', '(', '(', '(',             // 9-12
        '[', '[',                       // 13-14
        0                               // 15
    };

    /*
     * operand lookup table - 1st char
     *     0 : not simple operand
     *  b7=1 : 1st char of simple operand
     */
    static const char kOperandTbl1[] =
        "+-*/&|^="          // 0-7
        "<>%<><~-"          // 8-F
        "01234567"          //10-17
        "89\0\0\0\0\0\0"    //18-1F
        "++++++++"          //20-27
        "++\0\0\0\0\0\0"    //28-2F
        "\0<>\0\0\0\0\0"    //30-37
        "\0*@#/^|\\"        //38-3F
        "<<<<<<<<"          //40-47
        "<<??????"          //48-4F
        ">>>>>>>>"          //50-57
        ">>????\0?"         //58-5F
        "\0\0\0\0\0\0\0\0"  //60-67
        "\0\0\0\0\0\0\0\0"  //68-6f
        "\0\0\0\0\0\0\0\0"  //70-77
        "\0\0\0\0\0\0\0\0"  //78-7f
    ;

    /*
     * operand lookup table - 2nd char
     *     0 : only 1 char
     *     1 : was unary op
     *  b7=1 : 2nd char of simple operand
     *
     * (Changed numeric 1 to '!'.  Bit 7 never set.  Normally it's set
     * for anything that isn't numeric 0 or 1.)
     */
    static const char kOperandTbl2[] =
        "\0\0\0\0\0\0\0\0"  // 0-7
        "\0\0\0==>!!"       // 8-F      note: 1's mark unaries
        "\0\0\0\0\0\0\0\0"  //10-17
        "\0\0\0\0\0\0\0\0"  //18-1F
        "01234567"          //20-27
        "89\0\0\0\0\0\0"    //28-2F
        "\0<>\0\0\0\0\0"    //30-37
        "\0\0!!!!!!"        //38-3F     note: 1's mark unaries
        "01234567"          //40-47
        "89012345"          //48-4F
        "01234567"          //50-57
        "896789\0#"         //58-5F
        "\0\0\0\0\0\0\0\0"  //60-67
        "\0\0\0\0\0\0\0\0"  //68-6f
        "\0\0\0\0\0\0\0\0"  //70-77
        "\0\0\0\0\0\0\0\0"  //78-7f
    ;

    /*
     * operator lookup table
     *     0 : not operator
     *     1 : complex operator      
     *  b7=1 : 1st char of simple operator
     *
     * (Changed numeric 1 to '!'.  Bit 7 never set.)
     */
    static const char kOperatorTbl1[] =
        "+-*/&|^="          // 0-7
        "<>%<><\0\0"        // 8-F
        "\0\0\0\0\0\0\0\0"  //10-17
        "\0\0\0\0\0\0\0\0"  //18-1F
        "!!!!!!!!"          //20-27
        "!!!!!!!!"          //28-2F
        "\0<>\0\0\0\0\0"    //30-37
        "\0\0\0\0\0\0\0\0"  //38-3F
        "\0\0\0\0\0\0\0\0"  //40-47
        "\0\0\0\0\0\0\0\0"  //48-4F
        "\0\0\0\0\0\0\0\0"  //50-57
        "\0\0\0\0\0\0\0\0"  //58-5F
        "\0\0\0\0\0\0\0\0"  //60-67
        "\0\0\0\0\0\0\0\0"  //68-6f
        "\0\0\0\0\0\0\0\0"  //70-77
        "\0\0\0\0\0\0\0\0"  //78-7f
    ;
    static const char* kOperatorTbl2 = kOperandTbl2;

    static const char* kAdrsModeTrailer[] = {
        NULL, NULL, NULL, ",X",
        ",X", ",X", NULL, ",S",
        ",Y", "),Y", ",X)", ")",
        ",S),Y", "]", "],Y", NULL,
    };


    const unsigned char* codePtr = *pCodePtr;
    int len = *pLen;
    OperandResult result;
    unsigned char adrsMode = 0;
    unsigned char val;
    char ch;

    if (mnemonic == kMacroTKN || mnemonic < kSS) {
        /* ConvtOperand */
        OutputTab(fAdTab);
        if (mnemonic != kMacroTKN) {
            if (mnemonic < kGROUP3_tkns ||
                !(mnemonic < kGROUP5_tkns || mnemonic == kMVN_tkn ||
                  mnemonic == kMVP_tkn || mnemonic >= kGROUP6_tkns))
            {
                if (len <= 0) {
                    Output("!BAD ADRS!");
                } else {
                    adrsMode = *codePtr++;
                    len--;
                }
            }
            if (adrsMode < NELEM(kAdrsModeHeader)) {
                ch = kAdrsModeHeader[adrsMode];
                if (ch != 0)
                    Output(ch);
            } else {
                Output("!BAD ADRSMODE!");
            }
        }
        //printf("{ven=%d val=0x%02x}", len, *codePtr);

        /* OutOprnd */
        while (len > 0) {
            bool doOutOprtr = false;
            val = *codePtr++;
            len--;

            if (val >= 0x80) {
                if (val == kLabelTKN) {
                    /* OutLabel */
                    int idx;
                    idx = *codePtr++;
                    idx |= *codePtr++ << 8;
                    len -= 2;
                    PrintSymEntry(idx);
                    doOutOprtr = true;
                } else if (val == kComntSemiTKN) {
                    break;      // out of while, to OutOprndDone */
                } else {
                    /* illegal token */
                    Output('!');
                    Output(',');
                    /* keep looping in OutOprnd */
                }
            } else {
                /* OutOpr2 */
                ch = kOperandTbl1[val];
                if (ch != '\0') {
                    /* simple operand */
                    Output(ch);
                    ch = kOperandTbl2[val];
                    if (ch == '!')
                        continue;       // unary, no operator, go to OutOprnd
                    else if (ch != '\0')
                        Output(ch);
                    doOutOprtr = true;
                } else {
                    /* OutOprComp - complex operand */
                    result = PrintComplexOperand(val, &codePtr, &len);
                    if (result == kResultGotoOutOprtr)
                        goto OutOprtr;
                    // else continue around in OutOprnd
                }
            }

            if (doOutOprtr) {
OutOprtr:
                unsigned char opr;

                if (!len)
                    break;
                opr = *codePtr++;
                len--;

                if (opr >= 0x80) {
not_operator:
                    if (opr == kComntSemiTKN)
                        break;  // goto OutOprndDone
                    else {
                        /* must be two sequential operands */
                        Output(',');
                        codePtr--;  // back up
                        len++;
                        // continue around to OutOprnd
                    }
                } else {
                    char opch;

                    opch = kOperatorTbl1[opr];
                    if (opch == 0) {
                        goto not_operator;
                    } else if (opch == 0 || opch == '!') {
                        /* complex */
                        Output('+');
                        opch = kOperatorTbl2[opr];
                        //printf("{opch=0x%02x}", opch);
                        if (opch != '\0') {
                            Output(opch);
                            goto OutOprtr;      // look for another
                        } else {
                            int num;
                            num = opr - 0x10;
                            result = PrintNum(num, &codePtr, &len);
                            if (result == kResultGotoOutOprtr)
                                goto OutOprtr;
                        }
                    } else {
                        /* simple */
                        Output(' ');
                        Output(opch);
                        opch = kOperatorTbl2[opr];
                        if (opch != '\0')
                            Output(opch);
                        Output(' ');
                        // continue to OutOprnd
                    }
                }
            }
        }
    }

    /* OutOprndDone */
    if (adrsMode != 0) {
        if (adrsMode < NELEM(kAdrsModeHeader)) {
            if (kAdrsModeTrailer[adrsMode] != NULL)
                Output(kAdrsModeTrailer[adrsMode]);
        } else {
            Output("!BAD ADRSMODE!");
            printf("{ADRS=%d}", adrsMode);
        }
    }

    if (len > 0) {
        OutputTab(fComTab);
        Output(';');
        while (len--)
            Output(*codePtr++ & 0x7f);
    }

//bail:
    *pCodePtr = codePtr;
    *pLen = len;
}

/*
 * CnvrtDec - convert to decimal output.
 */
void
ReformatLISA4::PrintDec(int count, const unsigned char** pCodePtr,
    int* pLen)
{
    const unsigned char* codePtr = *pCodePtr;
    int len = *pLen;
    long val = 0;
    char buf[12];       // 4 bytes, max 10 chars + sign + nul

    for (int i = 0; i < count; i++) {
        val |= *codePtr++ << (8 * i);
        len--;
    }
    sprintf(buf, "%lu", val);
    Output(buf);

    *pCodePtr = codePtr;
    *pLen = len;
}

/*
 * CnvrtHex - convert to hex output.
 */
void
ReformatLISA4::PrintHex(int count, const unsigned char** pCodePtr,
    int* pLen)
{
    const unsigned char* codePtr = *pCodePtr;
    int len = *pLen;
    unsigned char val;

    Output('$');
    for (int i = count-1; i >= 0; i--) {
        val = *(codePtr+i);
        Output(gHexDigit[(val & 0xf0) >> 4]);
        Output(gHexDigit[val & 0x0f]);
    }
    codePtr += count;
    len -= count;

    *pCodePtr = codePtr;
    *pLen = len;
}

/*
 * CnvrtBin - convert to binary output.
 */
void
ReformatLISA4::PrintBin(int count, const unsigned char** pCodePtr,
    int* pLen)
{
    const unsigned char* codePtr = *pCodePtr;
    int len = *pLen;
    unsigned char val;
    char buf[9];

    buf[8] = '\0';

    Output('%');
    for (int i = count-1; i >= 0; i--) {
        val = *(codePtr+i);
        for (int bit = 0; bit < 8; bit++)
            buf[bit] = '0' + ((val >> (7-bit)) & 0x01);
        Output(buf);
    }

    codePtr += count;
    len -= count;

    *pCodePtr = codePtr;
    *pLen = len;
}

/*
 * OUTNUM
 */
ReformatLISA4::OperandResult
ReformatLISA4::PrintNum(unsigned char opr, const unsigned char** pCodePtr,
    int* pLen)
{
    OperandResult result = kResultUnknown;
    const unsigned char* codePtr = *pCodePtr;
    int len = *pLen;
    int idx;

    switch (opr) {
    case kDec3_tkn:
        PrintDec(3, &codePtr, &len);
        break;
    case kDec2_tkn:
        PrintDec(2, &codePtr, &len);
        break;
    case kDec1_tkn:
        PrintDec(1, &codePtr, &len);
        break;
    case kHex3_tkn:
        PrintHex(3, &codePtr, &len);
        break;
    case kHex2_tkn:
        PrintHex(2, &codePtr, &len);
        break;
    case kHex1_tkn:
        PrintHex(1, &codePtr, &len);
        break;
    case kBin3_tkn:
        PrintBin(3, &codePtr, &len);
        break;
    case kBin2_tkn:
        PrintBin(2, &codePtr, &len);
        break;
    case kBin1_tkn:
        PrintBin(1, &codePtr, &len);
        break;
    case kcABS_tkn:
        /* coerce absolute */
        if (*codePtr == kLabelTKN) {
            codePtr++;
            len--;
        }
        idx = *codePtr++;
        idx |= *codePtr++ << 8;
        len -= 2;
        PrintSymEntry(idx);
        Output(':');
        Output('A');
        break;
    case kcLONG_tkn:
        /* coerce long */
        if (*codePtr == kLabelTKN) {
            codePtr++;
            len--;
        }
        idx = *codePtr++;
        idx |= *codePtr++ << 8;
        len -= 2;
        PrintSymEntry(idx);
        Output(':');
        Output('L');
        break;
    case kMacE_tkn:
        /* macro expression */
        Output('?');
        Output(':');
        result = kResultGotoOutOprnd;
        break;
    default:
        if (opr >= kStr31_tkn+1) {
            /* CheckMoreOprnd - none currently */
            // (not expected, but not much we can do)
            Output("{CheckMoreOprnd}");
        } else {
            /* CheckStrings */
            unsigned char strLen;
            unsigned char val;
            unsigned char delimit;

            if ((opr & 0x1f) == 0) {
                strLen = *codePtr++;
                len--;
            } else {
                strLen = opr & 0x1f;
            }
            if (strLen > len) {
                Output("!BAD STR!");
                printf("{opr=0x%02x, strLen=%d, len=%d}", opr, strLen, len);
                return kResultFailed;
            }
            val = *codePtr;
            if (val < 0x80) {
                /* ISAPOST */
                delimit = '\'';
            } else {
                /* DETKNSTR */
                delimit = '\"';
            }
            Output(delimit);
            while (strLen--) {
                val = *codePtr++ & 0x7f;
                len--;

                Output(val);
                if (val == delimit)
                    Output(val);
            }
            Output(delimit);
        }
        break;
    }

    if (result == kResultUnknown)
        result = kResultGotoOutOprtr;

    *pCodePtr = codePtr;
    *pLen = len;
    return result;
}

/*
 * OutOprComp
 */
ReformatLISA4::OperandResult
ReformatLISA4::PrintComplexOperand(unsigned char opr,
    const unsigned char** pCodePtr, int* pLen)
{
    if (opr != kBign_tkn)
        return PrintNum(opr, pCodePtr, pLen);

/*
    const unsigned char* codePtr = *pCodePtr;
    int len = *pLen;
    *pCodePtr = codePtr;
    *pLen = len;
*/

    unsigned char subClass;

    /* OutOprComp */
    subClass = *(*pCodePtr)++;
    (*pLen)--;
    if (subClass == kBigndec4_tkn) {
        PrintDec(4, pCodePtr, pLen);
    } else if (subClass == kBignhex4_tkn) {
        PrintHex(4, pCodePtr, pLen);
    } else if (subClass == kBignbin4_tkn) {
        PrintBin(4, pCodePtr, pLen);
    } else if (subClass == kBignhexs_tkn) {
        /* hex string, for HEX pseudo-op */
        unsigned char hexLen = *(*pCodePtr)++;
        (*pLen)--;
        if (hexLen > *pLen) {
            Output("!BAD HEX!");
            return kResultFailed;
        }
        while (hexLen--) {
            unsigned char val = *(*pCodePtr)++;
            (*pLen)--;
            Output(gHexDigit[(val & 0xf0) >> 4]);
            Output(gHexDigit[val & 0x0f]);
        }
    } else if (subClass == kBignstring_tkn) {
        /* undelimited string */
        unsigned char strLen = *(*pCodePtr)++;
        (*pLen)--;
        if (strLen > *pLen) {
            Output("!BAD USTR!");
            return kResultFailed;
        }
        while (strLen--) {
            unsigned char val = *(*pCodePtr)++;
            (*pLen)--;
            Output(val & 0x7f);
        }
    } else {
        Output("!BAD CPLX OPRND!");
        DebugBreak();
        printf("OPR=%d SUBCLASS=%d", opr, subClass);
        return kResultFailed;
    }

    return kResultGotoOutOprtr;
}

/*
 * Print symbol table entry.
 */
void
ReformatLISA4::PrintSymEntry(int ent)
{
    if (ent < 0 || ent >= fSymCount) {
        Output("!BAD SYM!");
        return;
    }

    const unsigned char* str = fSymTab[ent];

    unsigned char uc;
    str++;
    while (1) {
        uc = *str++;
        if (!uc)
            break;
        else if (uc < 0x80)
            uc |= 0x20;
        Output(uc & 0x7f);
    }
}