cc65/src/ca65/scanner.c

/*****************************************************************************/
/*                                                                           */
/*                                 scanner.c                                 */
/*                                                                           */
/*                  The scanner for the ca65 macroassembler                  */
/*                                                                           */
/*                                                                           */
/*                                                                           */
/* (C) 1998-2013, Ullrich von Bassewitz                                      */
/*                Roemerstrasse 52                                           */
/*                D-70794 Filderstadt                                        */
/* EMail:         uz@cc65.org                                                */
/*                                                                           */
/*                                                                           */
/* This software is provided 'as-is', without any expressed or implied       */
/* warranty.  In no event will the authors be held liable for any damages    */
/* arising from the use of this software.                                    */
/*                                                                           */
/* Permission is granted to anyone to use this software for any purpose,     */
/* including commercial applications, and to alter it and redistribute it    */
/* freely, subject to the following restrictions:                            */
/*                                                                           */
/* 1. The origin of this software must not be misrepresented; you must not   */
/*    claim that you wrote the original software. If you use this software   */
/*    in a product, an acknowledgment in the product documentation would be  */
/*    appreciated but is not required.                                       */
/* 2. Altered source versions must be plainly marked as such, and must not   */
/*    be misrepresented as being the original software.                      */
/* 3. This notice may not be removed or altered from any source              */
/*    distribution.                                                          */
/*                                                                           */
/*****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>

/* common */
#include "addrsize.h"
#include "attrib.h"
#include "chartype.h"
#include "check.h"
#include "filestat.h"
#include "fname.h"
#include "xmalloc.h"

/* ca65 */
#include "condasm.h"
#include "error.h"
#include "filetab.h"
#include "global.h"
#include "incpath.h"
#include "instr.h"
#include "istack.h"
#include "listing.h"
#include "macro.h"
#include "toklist.h"
#include "scanner.h"


/*****************************************************************************/
/*                                   Data                                    */
/*****************************************************************************/


/* Current input token incl. attributes */
Token CurTok = STATIC_TOKEN_INITIALIZER;

/* Struct to handle include files. */
typedef struct InputFile InputFile;
struct InputFile {
    FILE*           F;                  /* Input file descriptor */
    FilePos         Pos;                /* Position in file */
    token_t         Tok;                /* Last token */
    int             C;                  /* Last character */
    StrBuf          Line;               /* The current input line */
    int             IncSearchPath;      /* True if we've added a search path */
    int             BinSearchPath;      /* True if we've added a search path */
    InputFile*      Next;               /* Linked list of input files */
};

/* Struct to handle textual input data */
typedef struct InputData InputData;
struct InputData {
    char*           Text;               /* Pointer to the text data */
    const char*     Pos;                /* Pointer to current position */
    int             Malloced;           /* Memory was malloced */
    token_t         Tok;                /* Last token */
    int             C;                  /* Last character */
    InputData*      Next;               /* Linked list of input data */
};

/* Input source: Either file or data */
typedef struct CharSource CharSource;

/* Set of input functions */
typedef struct CharSourceFunctions CharSourceFunctions;
struct CharSourceFunctions {
    void (*MarkStart) (CharSource*);    /* Mark the start pos of a token */
    void (*NextChar) (CharSource*);     /* Read next char from input */
    void (*Done) (CharSource*);         /* Close input source */
};

/* Input source: Either file or data */
struct CharSource {
    CharSource*                 Next;   /* Linked list of char sources */
    token_t                     Tok;    /* Last token */
    int                         C;      /* Last character */
    const CharSourceFunctions*  Func;   /* Pointer to function table */
    union {
        InputFile               File;   /* File data */
        InputData               Data;   /* Textual data */
    }                           V;
};

/* Current input variables */
static CharSource* Source       = 0;    /* Current char source */
static unsigned    FCount       = 0;    /* Count of input files */
static int         C            = 0;    /* Current input character */

/* Force end of assembly */
int               ForcedEnd     = 0;

/* List of dot keywords with the corresponding tokens */
struct DotKeyword {
    const char* Key;                    /* MUST be first field */
    token_t     Tok;
} DotKeywords [] = {
    { ".A16",           TOK_A16                 },
    { ".A8",            TOK_A8                  },
    { ".ADDR",          TOK_ADDR                },
    { ".ADDRSIZE",      TOK_ADDRSIZE            },
    { ".ALIGN",         TOK_ALIGN               },
    { ".AND",           TOK_BOOLAND             },
    { ".ASCIIZ",        TOK_ASCIIZ              },
    { ".ASIZE",         TOK_ASIZE               },
    { ".ASSERT",        TOK_ASSERT              },
    { ".AUTOIMPORT",    TOK_AUTOIMPORT          },
    { ".BANK",          TOK_BANK                },
    { ".BANKBYTE",      TOK_BANKBYTE            },
    { ".BANKBYTES",     TOK_BANKBYTES           },
    { ".BITAND",        TOK_AND                 },
    { ".BITNOT",        TOK_NOT                 },
    { ".BITOR",         TOK_OR                  },
    { ".BITXOR",        TOK_XOR                 },
    { ".BLANK",         TOK_BLANK               },
    { ".BSS",           TOK_BSS                 },
    { ".BYT",           TOK_BYTE                },
    { ".BYTE",          TOK_BYTE                },
    { ".CASE",          TOK_CASE                },
    { ".CHARMAP",       TOK_CHARMAP             },
    { ".CODE",          TOK_CODE                },
    { ".CONCAT",        TOK_CONCAT              },
    { ".CONDES",        TOK_CONDES              },
    { ".CONST",         TOK_CONST               },
    { ".CONSTRUCTOR",   TOK_CONSTRUCTOR         },
    { ".CPU",           TOK_CPU                 },
    { ".DATA",          TOK_DATA                },
    { ".DBG",           TOK_DBG                 },
    { ".DBYT",          TOK_DBYT                },
    { ".DEBUGINFO",     TOK_DEBUGINFO           },
    { ".DEF",           TOK_DEFINED             },
    { ".DEFINE",        TOK_DEFINE              },
    { ".DEFINED",       TOK_DEFINED             },
    { ".DEFINEDMACRO",  TOK_DEFINEDMACRO        },
    { ".DELMAC",        TOK_DELMAC              },
    { ".DELMACRO",      TOK_DELMAC              },
    { ".DESTRUCTOR",    TOK_DESTRUCTOR          },
    { ".DWORD",         TOK_DWORD               },
    { ".ELSE",          TOK_ELSE                },
    { ".ELSEIF",        TOK_ELSEIF              },
    { ".END",           TOK_END                 },
    { ".ENDENUM",       TOK_ENDENUM             },
    { ".ENDIF",         TOK_ENDIF               },
    { ".ENDMAC",        TOK_ENDMACRO            },
    { ".ENDMACRO",      TOK_ENDMACRO            },
    { ".ENDPROC",       TOK_ENDPROC             },
    { ".ENDREP",        TOK_ENDREP              },
    { ".ENDREPEAT",     TOK_ENDREP              },
    { ".ENDSCOPE",      TOK_ENDSCOPE            },
    { ".ENDSTRUCT",     TOK_ENDSTRUCT           },
    { ".ENDUNION",      TOK_ENDUNION            },
    { ".ENUM",          TOK_ENUM                },
    { ".ERROR",         TOK_ERROR               },
    { ".EXITMAC",       TOK_EXITMACRO           },
    { ".EXITMACRO",     TOK_EXITMACRO           },
    { ".EXPORT",        TOK_EXPORT              },
    { ".EXPORTZP",      TOK_EXPORTZP            },
    { ".FARADDR",       TOK_FARADDR             },
    { ".FATAL",         TOK_FATAL               },
    { ".FEATURE",       TOK_FEATURE             },
    { ".FILEOPT",       TOK_FILEOPT             },
    { ".FOPT",          TOK_FILEOPT             },
    { ".FORCEIMPORT",   TOK_FORCEIMPORT         },
    { ".FORCEWORD",     TOK_FORCEWORD           },
    { ".GLOBAL",        TOK_GLOBAL              },
    { ".GLOBALZP",      TOK_GLOBALZP            },
    { ".HIBYTE",        TOK_HIBYTE              },
    { ".HIBYTES",       TOK_HIBYTES             },
    { ".HIWORD",        TOK_HIWORD              },
    { ".I16",           TOK_I16                 },
    { ".I8",            TOK_I8                  },
    { ".IDENT",         TOK_MAKEIDENT           },
    { ".IF",            TOK_IF                  },
    { ".IFBLANK",       TOK_IFBLANK             },
    { ".IFCONST",       TOK_IFCONST             },
    { ".IFDEF",         TOK_IFDEF               },
    { ".IFNBLANK",      TOK_IFNBLANK            },
    { ".IFNCONST",      TOK_IFNCONST            },
    { ".IFNDEF",        TOK_IFNDEF              },
    { ".IFNREF",        TOK_IFNREF              },
    { ".IFP02",         TOK_IFP02               },
    { ".IFP816",        TOK_IFP816              },
    { ".IFPC02",        TOK_IFPC02              },
    { ".IFPSC02",       TOK_IFPSC02             },
    { ".IFREF",         TOK_IFREF               },
    { ".IMPORT",        TOK_IMPORT              },
    { ".IMPORTZP",      TOK_IMPORTZP            },
    { ".INCBIN",        TOK_INCBIN              },
    { ".INCLUDE",       TOK_INCLUDE             },
    { ".INTERRUPTOR",   TOK_INTERRUPTOR         },
    { ".ISIZE",         TOK_ISIZE               },
    { ".ISMNEM",        TOK_ISMNEMONIC          },
    { ".ISMNEMONIC",    TOK_ISMNEMONIC          },
    { ".LEFT",          TOK_LEFT                },
    { ".LINECONT",      TOK_LINECONT            },
    { ".LIST",          TOK_LIST                },
    { ".LISTBYTES",     TOK_LISTBYTES           },
    { ".LOBYTE",        TOK_LOBYTE              },
    { ".LOBYTES",       TOK_LOBYTES             },
    { ".LOCAL",         TOK_LOCAL               },
    { ".LOCALCHAR",     TOK_LOCALCHAR           },
    { ".LOWORD",        TOK_LOWORD              },
    { ".MAC",           TOK_MACRO               },
    { ".MACPACK",       TOK_MACPACK             },
    { ".MACRO",         TOK_MACRO               },
    { ".MATCH",         TOK_MATCH               },
    { ".MAX",           TOK_MAX                 },
    { ".MID",           TOK_MID                 },
    { ".MIN",           TOK_MIN                 },
    { ".MOD",           TOK_MOD                 },
    { ".NOT",           TOK_BOOLNOT             },
    { ".NULL",          TOK_NULL                },
    { ".OR",            TOK_BOOLOR              },
    { ".ORG",           TOK_ORG                 },
    { ".OUT",           TOK_OUT                 },
    { ".P02",           TOK_P02                 },
    { ".P816",          TOK_P816                },
    { ".PAGELEN",       TOK_PAGELENGTH          },
    { ".PAGELENGTH",    TOK_PAGELENGTH          },
    { ".PARAMCOUNT",    TOK_PARAMCOUNT          },
    { ".PC02",          TOK_PC02                },
    { ".POPCPU",        TOK_POPCPU              },
    { ".POPSEG",        TOK_POPSEG              },
    { ".PROC",          TOK_PROC                },
    { ".PSC02",         TOK_PSC02               },
    { ".PUSHCPU",       TOK_PUSHCPU             },
    { ".PUSHSEG",       TOK_PUSHSEG             },
    { ".REF",           TOK_REFERENCED          },
    { ".REFERENCED",    TOK_REFERENCED          },
    { ".RELOC",         TOK_RELOC               },
    { ".REPEAT",        TOK_REPEAT              },
    { ".RES",           TOK_RES                 },
    { ".RIGHT",         TOK_RIGHT               },
    { ".RODATA",        TOK_RODATA              },
    { ".SCOPE",         TOK_SCOPE               },
    { ".SEGMENT",       TOK_SEGMENT             },
    { ".SET",           TOK_SET                 },
    { ".SETCPU",        TOK_SETCPU              },
    { ".SHL",           TOK_SHL                 },
    { ".SHR",           TOK_SHR                 },
    { ".SIZEOF",        TOK_SIZEOF              },
    { ".SMART",         TOK_SMART               },
    { ".SPRINTF",       TOK_SPRINTF             },
    { ".STRAT",         TOK_STRAT               },
    { ".STRING",        TOK_STRING              },
    { ".STRLEN",        TOK_STRLEN              },
    { ".STRUCT",        TOK_STRUCT              },
    { ".TAG",           TOK_TAG                 },
    { ".TCOUNT",        TOK_TCOUNT              },
    { ".TIME",          TOK_TIME                },
    { ".UNDEF",         TOK_UNDEF               },
    { ".UNDEFINE",      TOK_UNDEF               },
    { ".UNION",         TOK_UNION               },
    { ".VERSION",       TOK_VERSION             },
    { ".WARNING",       TOK_WARNING             },
    { ".WORD",          TOK_WORD                },
    { ".XMATCH",        TOK_XMATCH              },
    { ".XOR",           TOK_BOOLXOR             },
    { ".ZEROPAGE",      TOK_ZEROPAGE            },
};


/*****************************************************************************/
/*                            CharSource functions                           */
/*****************************************************************************/


static void UseCharSource (CharSource* S)
/* Initialize a new input source and start to use it. */
{
    /* Remember the current input char and token */
    S->Tok      = CurTok.Tok;
    S->C        = C;

    /* Use the new input source */
    S->Next     = Source;
    Source      = S;

    /* Read the first character from the new file */
    S->Func->NextChar (S);

    /* Setup the next token so it will be skipped on the next call to
    ** NextRawTok().
    */
    CurTok.Tok = TOK_SEP;
}


static void DoneCharSource (void)
/* Close the top level character source */
{
    CharSource* S;

    /* First, call the type specific function */
    Source->Func->Done (Source);

    /* Restore the old token */
    CurTok.Tok = Source->Tok;
    C   = Source->C;

    /* Remember the last stacked input source */
    S = Source->Next;

    /* Delete the top level one ... */
    xfree (Source);

    /* ... and use the one before */
    Source = S;
}


/*****************************************************************************/
/*                            InputFile functions                            */
/*****************************************************************************/


static void IFMarkStart (CharSource* S)
/* Mark the start of the next token */
{
    CurTok.Pos = S->V.File.Pos;
}


static void IFNextChar (CharSource* S)
/* Read the next character from the input file */
{
    /* Check for end of line, read the next line if needed */
    while (SB_GetIndex (&S->V.File.Line) >= SB_GetLen (&S->V.File.Line)) {

        unsigned Len;

        /* End of current line reached, read next line */
        SB_Clear (&S->V.File.Line);
        while (1) {

            int N = fgetc (S->V.File.F);
            if (N == EOF) {
                /* End of file. Accept files without a newline at the end */
                if (SB_NotEmpty (&S->V.File.Line)) {
                    break;
                }

                /* No more data - add an empty line to the listing. This
                ** is a small hack needed to keep the PC output in sync.
                */
                NewListingLine (&EmptyStrBuf, S->V.File.Pos.Name, FCount);
                C = EOF;
                return;

            /* Check for end of line */
            } else if (N == '\n') {

                /* End of line */
                break;

            /* Collect other stuff */
            } else {

                /* Append data to line */
                SB_AppendChar (&S->V.File.Line, N);

            }
        }


        /* If we come here, we have a new input line. To avoid problems
        ** with strange line terminators, remove all whitespace from the
        ** end of the line, the add a single newline.
        */
        Len = SB_GetLen (&S->V.File.Line);
        while (Len > 0 && IsSpace (SB_AtUnchecked (&S->V.File.Line, Len-1))) {
            --Len;
        }
        SB_Drop (&S->V.File.Line, SB_GetLen (&S->V.File.Line) - Len);
        SB_AppendChar (&S->V.File.Line, '\n');

        /* Terminate the string buffer */
        SB_Terminate (&S->V.File.Line);

        /* One more line */
        S->V.File.Pos.Line++;

        /* Remember the new line for the listing */
        NewListingLine (&S->V.File.Line, S->V.File.Pos.Name, FCount);

    }

    /* Set the column pointer */
    S->V.File.Pos.Col = SB_GetIndex (&S->V.File.Line);

    /* Return the next character from the buffer */
    C = SB_Get (&S->V.File.Line);
}


void IFDone (CharSource* S)
/* Close the current input file */
{
    /* We're at the end of an include file. Check if we have any
    ** open .IFs, or any open token lists in this file. This
    ** enforcement is artificial, using conditionals that start
    ** in one file and end in another are uncommon, and don't
    ** allowing these things will help finding errors.
    */
    CheckOpenIfs ();

    /* If we've added search paths for this file, remove them */
    if (S->V.File.IncSearchPath) {
        PopSearchPath (IncSearchPath);
    }
    if (S->V.File.BinSearchPath) {
        PopSearchPath (BinSearchPath);
    }

    /* Free the line buffer */
    SB_Done (&S->V.File.Line);

    /* Close the input file and decrement the file count. We will ignore
    ** errors here, since we were just reading from the file.
    */
    (void) fclose (S->V.File.F);
    --FCount;
}


/* Set of input file handling functions */
static const CharSourceFunctions IFFunc = {
    IFMarkStart,
    IFNextChar,
    IFDone
};


int NewInputFile (const char* Name)
/* Open a new input file. Returns true if the file could be successfully opened
** and false otherwise.
*/
{
    int         RetCode = 0;            /* Return code. Assume an error. */
    char*       PathName = 0;
    FILE*       F;
    struct stat Buf;
    StrBuf      NameBuf;                /* No need to initialize */
    StrBuf      Path = AUTO_STRBUF_INITIALIZER;
    unsigned    FileIdx;
    CharSource* S;


    /* If this is the main file, just try to open it. If it's an include file,
    ** search for it using the include path list.
    */
    if (FCount == 0) {
        /* Main file */
        F = fopen (Name, "r");
        if (F == 0) {
            Fatal ("Cannot open input file `%s': %s", Name, strerror (errno));
        }
    } else {
        /* We are on include level. Search for the file in the include
        ** directories.
        */
        PathName = SearchFile (IncSearchPath, Name);
        if (PathName == 0 || (F = fopen (PathName, "r")) == 0) {
            /* Not found or cannot open, print an error and bail out */
            Error ("Cannot open include file `%s': %s", Name, strerror (errno));
            goto ExitPoint;
        }

        /* Use the path name from now on */
        Name = PathName;
    }

    /* Stat the file and remember the values. There's a race condition here,
    ** since we cannot use fileno() (non-standard identifier in standard
    ** header file), and therefore not fstat. When using stat with the
    ** file name, there's a risk that the file was deleted and recreated
    ** while it was open. Since mtime and size are only used to check
    ** if a file has changed in the debugger, we will ignore this problem
    ** here.
    */
    if (FileStat (Name, &Buf) != 0) {
        Fatal ("Cannot stat input file `%s': %s", Name, strerror (errno));
    }

    /* Add the file to the input file table and remember the index */
    FileIdx = AddFile (SB_InitFromString (&NameBuf, Name),
                       (FCount == 0)? FT_MAIN : FT_INCLUDE,
                       Buf.st_size, (unsigned long) Buf.st_mtime);

    /* Create a new input source variable and initialize it */
    S                   = xmalloc (sizeof (*S));
    S->Func             = &IFFunc;
    S->V.File.F         = F;
    S->V.File.Pos.Line  = 0;
    S->V.File.Pos.Col   = 0;
    S->V.File.Pos.Name  = FileIdx;
    SB_Init (&S->V.File.Line);

    /* Push the path for this file onto the include search lists */
    SB_CopyBuf (&Path, Name, FindName (Name) - Name);
    SB_Terminate (&Path);
    S->V.File.IncSearchPath = PushSearchPath (IncSearchPath, SB_GetConstBuf (&Path));
    S->V.File.BinSearchPath = PushSearchPath (BinSearchPath, SB_GetConstBuf (&Path));
    SB_Done (&Path);

    /* Count active input files */
    ++FCount;

    /* Use this input source */
    UseCharSource (S);

    /* File successfully opened */
    RetCode = 1;

ExitPoint:
    /* Free an allocated name buffer */
    xfree (PathName);

    /* Return the success code */
    return RetCode;
}


/*****************************************************************************/
/*                            InputData functions                            */
/*****************************************************************************/


static void IDMarkStart (CharSource* S attribute ((unused)))
/* Mark the start of the next token */
{
    /* Nothing to do here */
}


static void IDNextChar (CharSource* S)
/* Read the next character from the input text */
{
    C = *S->V.Data.Pos++;
    if (C == '\0') {
        /* End of input data */
        --S->V.Data.Pos;
        C = EOF;
    }
}


void IDDone (CharSource* S)
/* Close the current input data */
{
    /* Cleanup the current stuff */
    if (S->V.Data.Malloced) {
        xfree (S->V.Data.Text);
    }
}


/* Set of input data handling functions */
static const CharSourceFunctions IDFunc = {
    IDMarkStart,
    IDNextChar,
    IDDone
};


void NewInputData (char* Text, int Malloced)
/* Add a chunk of input data to the input stream */
{
    CharSource* S;

    /* Create a new input source variable and initialize it */
    S                   = xmalloc (sizeof (*S));
    S->Func             = &IDFunc;
    S->V.Data.Text      = Text;
    S->V.Data.Pos       = Text;
    S->V.Data.Malloced  = Malloced;

    /* Use this input source */
    UseCharSource (S);
}


/*****************************************************************************/
/*                    Character classification functions                     */
/*****************************************************************************/


int IsIdChar (int C)
/* Return true if the character is a valid character for an identifier */
{
    return IsAlNum (C)                  ||
           (C == '_')                   ||
           (C == '@' && AtInIdents)     ||
           (C == '$' && DollarInIdents);
}


int IsIdStart (int C)
/* Return true if the character may start an identifier */
{
    return IsAlpha (C) || C == '_';
}


/*****************************************************************************/
/*                                   Code                                    */
/*****************************************************************************/


static unsigned DigitVal (unsigned char C)
/* Convert a digit into it's numerical representation */
{
    if (IsDigit (C)) {
        return C - '0';
    } else {
        return toupper (C) - 'A' + 10;
    }
}


static void NextChar (void)
/* Read the next character from the input file */
{
    Source->Func->NextChar (Source);
}


void LocaseSVal (void)
/* Make SVal lower case */
{
    SB_ToLower (&CurTok.SVal);
}


void UpcaseSVal (void)
/* Make SVal upper case */
{
    SB_ToUpper (&CurTok.SVal);
}


static int CmpDotKeyword (const void* K1, const void* K2)
/* Compare function for the dot keyword search */
{
    return strcmp (((struct DotKeyword*)K1)->Key, ((struct DotKeyword*)K2)->Key);
}


static token_t FindDotKeyword (void)
/* Find the dot keyword in SVal. Return the corresponding token if found,
** return TOK_NONE if not found.
*/
{
    struct DotKeyword K;
    struct DotKeyword* R;

    /* Initialize K */
    K.Key = SB_GetConstBuf (&CurTok.SVal);
    K.Tok = 0;

    /* If we aren't in ignore case mode, we have to uppercase the keyword */
    if (!IgnoreCase) {
        UpcaseSVal ();
    }

    /* Search for the keyword */
    R = bsearch (&K, DotKeywords, sizeof (DotKeywords) / sizeof (DotKeywords [0]),
                 sizeof (DotKeywords [0]), CmpDotKeyword);
    if (R != 0) {

        /* By default, disable any somewhat experiemental DotKeyword. */

        switch (R->Tok) {

            case TOK_ADDRSIZE:
                /* Disallow .ADDRSIZE function by default */
                if (AddrSize == 0) {
                    return TOK_NONE;
                }
                break;

            default:
                break;
        }

        return R->Tok;

    } else {
        return TOK_NONE;
    }
}


static void ReadIdent (void)
/* Read an identifier from the current input position into Ident. Filling SVal
** starts at the current position with the next character in C. It is assumed
** that any characters already filled in are ok, and the character in C is
** checked.
*/
{
    /* Read the identifier */
    do {
        SB_AppendChar (&CurTok.SVal, C);
        NextChar ();
    } while (IsIdChar (C));
    SB_Terminate (&CurTok.SVal);

    /* If we should ignore case, convert the identifier to upper case */
    if (IgnoreCase) {
        UpcaseSVal ();
    }
}


static void ReadStringConst (int StringTerm)
/* Read a string constant into SVal. */
{
    /* Skip the leading string terminator */
    NextChar ();

    /* Read the string */
    while (1) {
        if (C == StringTerm) {
            break;
        }
        if (C == '\n' || C == EOF) {
            Error ("Newline in string constant");
            break;
        }

        /* Append the char to the string */
        SB_AppendChar (&CurTok.SVal, C);

        /* Skip the character */
        NextChar ();
    }

    /* Skip the trailing terminator */
    NextChar ();

    /* Terminate the string */
    SB_Terminate (&CurTok.SVal);
}


static int Sweet16Reg (const StrBuf* Id)
/* Check if the given identifier is a sweet16 register. Return -1 if this is
** not the case, return the register number otherwise.
*/
{
    unsigned RegNum;
    char Check;

    if (SB_GetLen (Id) < 2) {
        return -1;
    }
    if (toupper (SB_AtUnchecked (Id, 0)) != 'R') {
        return -1;
    }
    if (!IsDigit (SB_AtUnchecked (Id, 1))) {
        return -1;
    }

    if (sscanf (SB_GetConstBuf (Id)+1, "%u%c", &RegNum, &Check) != 1 || RegNum > 15) {
        /* Invalid register */
        return -1;
    }

    /* The register number is valid */
    return (int) RegNum;
}


void NextRawTok (void)
/* Read the next raw token from the input stream */
{
    Macro* M;

    /* If we've a forced end of assembly, don't read further */
    if (ForcedEnd) {
        CurTok.Tok = TOK_EOF;
        return;
    }

Restart:
    /* Check if we have tokens from another input source */
    if (InputFromStack ()) {
        if (CurTok.Tok == TOK_IDENT && (M = FindDefine (&CurTok.SVal)) != 0) {
            /* This is a define style macro - expand it */
            MacExpandStart (M);
            goto Restart;
        }
        return;
    }

Again:
    /* Skip whitespace, remember if we had some */
    if ((CurTok.WS = IsBlank (C)) != 0) {
        do {
            NextChar ();
        } while (IsBlank (C));
    }

    /* Mark the file position of the next token */
    Source->Func->MarkStart (Source);

    /* Clear the string attribute */
    SB_Clear (&CurTok.SVal);

    /* Generate line info for the current token */
    NewAsmLine ();

    /* Hex number or PC symbol? */
    if (C == '$') {
        NextChar ();

        /* Hex digit must follow or DollarIsPC must be enabled */
        if (!IsXDigit (C)) {
            if (DollarIsPC) {
                CurTok.Tok = TOK_PC;
                return;
            } else {
                Error ("Hexadecimal digit expected");
            }
        }

        /* Read the number */
        CurTok.IVal = 0;
        while (1) {
            if (UnderlineInNumbers && C == '_') {
                while (C == '_') {
                    NextChar ();
                }
                if (!IsXDigit (C)) {
                    Error ("Number may not end with underline");
                }
            }
            if (IsXDigit (C)) {
                if (CurTok.IVal & 0xF0000000) {
                    Error ("Overflow in hexadecimal number");
                    CurTok.IVal = 0;
                }
                CurTok.IVal = (CurTok.IVal << 4) + DigitVal (C);
                NextChar ();
            } else {
                break;
            }
        }

        /* This is an integer constant */
        CurTok.Tok = TOK_INTCON;
        return;
    }

    /* Binary number? */
    if (C == '%') {
        NextChar ();

        /* 0 or 1 must follow */
        if (!IsBDigit (C)) {
            Error ("Binary digit expected");
        }

        /* Read the number */
        CurTok.IVal = 0;
        while (1) {
            if (UnderlineInNumbers && C == '_') {
                while (C == '_') {
                    NextChar ();
                }
                if (!IsBDigit (C)) {
                    Error ("Number may not end with underline");
                }
            }
            if (IsBDigit (C)) {
                if (CurTok.IVal & 0x80000000) {
                    Error ("Overflow in binary number");
                    CurTok.IVal = 0;
                }
                CurTok.IVal = (CurTok.IVal << 1) + DigitVal (C);
                NextChar ();
            } else {
                break;
            }
        }

        /* This is an integer constant */
        CurTok.Tok = TOK_INTCON;
        return;
    }

    /* Number? */
    if (IsDigit (C)) {

        char Buf[16];
        unsigned Digits;
        unsigned Base;
        unsigned I;
        long     Max;
        unsigned DVal;

        /* Ignore leading zeros */
        while (C == '0') {
            NextChar ();
        }

        /* Read the number into Buf counting the digits */
        Digits = 0;
        while (1) {
            if (UnderlineInNumbers && C == '_') {
                while (C == '_') {
                    NextChar ();
                }
                if (!IsXDigit (C)) {
                    Error ("Number may not end with underline");
                }
            }
            if (IsXDigit (C)) {
                /* Buf is big enough to allow any decimal and hex number to
                ** overflow, so ignore excess digits here, they will be detected
                ** when we convert the value.
                */
                if (Digits < sizeof (Buf)) {
                    Buf[Digits++] = C;
                }
                NextChar ();
            } else {
                break;
            }
        }

        /* Allow zilog/intel style hex numbers with a 'h' suffix */
        if (C == 'h' || C == 'H') {
            NextChar ();
            Base = 16;
            Max  = 0xFFFFFFFFUL / 16;
        } else {
            Base = 10;
            Max  = 0xFFFFFFFFUL / 10;
        }

        /* Convert the number using the given base */
        CurTok.IVal = 0;
        for (I = 0; I < Digits; ++I) {
            if (CurTok.IVal > Max) {
                Error ("Number out of range");
                CurTok.IVal = 0;
                break;
            }
            DVal = DigitVal (Buf[I]);
            if (DVal >= Base) {
                Error ("Invalid digits in number");
                CurTok.IVal = 0;
                break;
            }
            CurTok.IVal = (CurTok.IVal * Base) + DVal;
        }

        /* This is an integer constant */
        CurTok.Tok = TOK_INTCON;
        return;
    }

    /* Control command? */
    if (C == '.') {

        /* Remember and skip the dot */
        NextChar ();

        /* Check if it's just a dot */
        if (!IsIdStart (C)) {

            /* Just a dot */
            CurTok.Tok = TOK_DOT;

        } else {

            /* Read the remainder of the identifier */
            SB_AppendChar (&CurTok.SVal, '.');
            ReadIdent ();

            /* Dot keyword, search for it */
            CurTok.Tok = FindDotKeyword ();
            if (CurTok.Tok == TOK_NONE) {

                /* Not found */
                if (!LeadingDotInIdents) {
                    /* Invalid pseudo instruction */
                    Error ("`%m%p' is not a recognized control command", &CurTok.SVal);
                    goto Again;
                }

                /* An identifier with a dot. Check if it's a define style
                ** macro.
                */
                if ((M = FindDefine (&CurTok.SVal)) != 0) {
                    /* This is a define style macro - expand it */
                    MacExpandStart (M);
                    goto Restart;
                }

                /* Just an identifier with a dot */
                CurTok.Tok = TOK_IDENT;
            }

        }
        return;
    }

    /* Indirect op for sweet16 cpu. Must check this before checking for local
    ** symbols, because these may also use the '@' symbol.
    */
    if (CPU == CPU_SWEET16 && C == '@') {
        NextChar ();
        CurTok.Tok = TOK_AT;
        return;
    }

    /* Local symbol? */
    if (C == LocalStart) {

        /* Read the identifier. */
        ReadIdent ();

        /* Start character alone is not enough */
        if (SB_GetLen (&CurTok.SVal) == 1) {
            Error ("Invalid cheap local symbol");
            goto Again;
        }

        /* A local identifier */
        CurTok.Tok = TOK_LOCAL_IDENT;
        return;
    }


    /* Identifier or keyword? */
    if (IsIdStart (C)) {

        /* Read the identifier */
        ReadIdent ();

        /* Check for special names. Bail out if we have identified the type of
        ** the token. Go on if the token is an identifier.
        */
        if (SB_GetLen (&CurTok.SVal) == 1) {
            switch (toupper (SB_AtUnchecked (&CurTok.SVal, 0))) {

                case 'A':
                    if (C == ':') {
                        NextChar ();
                        CurTok.Tok = TOK_OVERRIDE_ABS;
                    } else {
                        CurTok.Tok = TOK_A;
                    }
                    return;

                case 'F':
                    if (C == ':') {
                        NextChar ();
                        CurTok.Tok = TOK_OVERRIDE_FAR;
                        return;
                    }
                    break;

                case 'S':
                    if (CPU == CPU_65816) {
                        CurTok.Tok = TOK_S;
                        return;
                    }
                    break;

                case 'X':
                    CurTok.Tok = TOK_X;
                    return;

                case 'Y':
                    CurTok.Tok = TOK_Y;
                    return;

                case 'Z':
                    if (C == ':') {
                        NextChar ();
                        CurTok.Tok = TOK_OVERRIDE_ZP;
                        return;
                    }
                    break;

                default:
                    break;
            }

        } else if (CPU == CPU_SWEET16 &&
                  (CurTok.IVal = Sweet16Reg (&CurTok.SVal)) >= 0) {

            /* A sweet16 register number in sweet16 mode */
            CurTok.Tok = TOK_REG;
            return;

        }

        /* Check for define style macro */
        if ((M = FindDefine (&CurTok.SVal)) != 0) {
            /* Macro - expand it */
            MacExpandStart (M);
            goto Restart;
        } else {
            /* An identifier */
            CurTok.Tok = TOK_IDENT;
        }
        return;
    }

    /* Ok, let's do the switch */
CharAgain:
    switch (C) {

        case '+':
            NextChar ();
            CurTok.Tok = TOK_PLUS;
            return;

        case '-':
            NextChar ();
            CurTok.Tok = TOK_MINUS;
            return;

        case '/':
            NextChar ();
            if (C != '*') {
                CurTok.Tok = TOK_DIV;
            } else if (CComments) {
                /* Remember the position, then skip the '*' */
                Collection LineInfos = STATIC_COLLECTION_INITIALIZER;
                GetFullLineInfo (&LineInfos);
                NextChar ();
                do {
                    while (C !=  '*') {
                        if (C == EOF) {
                            LIError (&LineInfos, "Unterminated comment");
                            ReleaseFullLineInfo (&LineInfos);
                            DoneCollection (&LineInfos);
                            goto CharAgain;
                        }
                        NextChar ();
                    }
                    NextChar ();
                } while (C != '/');
                NextChar ();
                ReleaseFullLineInfo (&LineInfos);
                DoneCollection (&LineInfos);
                goto Again;
            }
            return;

        case '*':
            NextChar ();
            CurTok.Tok = TOK_MUL;
            return;

        case '^':
            NextChar ();
            CurTok.Tok = TOK_XOR;
            return;

        case '&':
            NextChar ();
            if (C == '&') {
                NextChar ();
                CurTok.Tok = TOK_BOOLAND;
            } else {
                CurTok.Tok = TOK_AND;
            }
            return;

        case '|':
            NextChar ();
            if (C == '|') {
                NextChar ();
                CurTok.Tok = TOK_BOOLOR;
            } else {
                CurTok.Tok = TOK_OR;
            }
            return;

        case ':':
            NextChar ();
            switch (C) {

                case ':':
                    NextChar ();
                    CurTok.Tok = TOK_NAMESPACE;
                    break;

                case '-':
                    CurTok.IVal = 0;
                    do {
                        --CurTok.IVal;
                        NextChar ();
                    } while (C == '-');
                    CurTok.Tok = TOK_ULABEL;
                    break;

                case '+':
                    CurTok.IVal = 0;
                    do {
                        ++CurTok.IVal;
                        NextChar ();
                    } while (C == '+');
                    CurTok.Tok = TOK_ULABEL;
                    break;

                case '=':
                    NextChar ();
                    CurTok.Tok = TOK_ASSIGN;
                    break;

                default:
                    CurTok.Tok = TOK_COLON;
                    break;
            }
            return;

        case ',':
            NextChar ();
            CurTok.Tok = TOK_COMMA;
            return;

        case ';':
            NextChar ();
            while (C != '\n' && C != EOF) {
                NextChar ();
            }
            goto CharAgain;

        case '#':
            NextChar ();
            CurTok.Tok = TOK_HASH;
            return;

        case '(':
            NextChar ();
            CurTok.Tok = TOK_LPAREN;
            return;

        case ')':
            NextChar ();
            CurTok.Tok = TOK_RPAREN;
            return;

        case '[':
            NextChar ();
            CurTok.Tok = TOK_LBRACK;
            return;

        case ']':
            NextChar ();
            CurTok.Tok = TOK_RBRACK;
            return;

        case '{':
            NextChar ();
            CurTok.Tok = TOK_LCURLY;
            return;

        case '}':
            NextChar ();
            CurTok.Tok = TOK_RCURLY;
            return;

        case '<':
            NextChar ();
            if (C == '=') {
                NextChar ();
                CurTok.Tok = TOK_LE;
            } else if (C == '<') {
                NextChar ();
                CurTok.Tok = TOK_SHL;
            } else if (C == '>') {
                NextChar ();
                CurTok.Tok = TOK_NE;
            } else {
                CurTok.Tok = TOK_LT;
            }
            return;

        case '=':
            NextChar ();
            CurTok.Tok = TOK_EQ;
            return;

        case '!':
            NextChar ();
            CurTok.Tok = TOK_BOOLNOT;
            return;

        case '>':
            NextChar ();
            if (C == '=') {
                NextChar ();
                CurTok.Tok = TOK_GE;
            } else if (C == '>') {
                NextChar ();
                CurTok.Tok = TOK_SHR;
            } else {
                CurTok.Tok = TOK_GT;
            }
            return;

        case '~':
            NextChar ();
            CurTok.Tok = TOK_NOT;
            return;

        case '\'':
            /* Hack: If we allow ' as terminating character for strings, read
            ** the following stuff as a string, and check for a one character
            ** string later.
            */
            if (LooseStringTerm) {
                ReadStringConst ('\'');
                if (SB_GetLen (&CurTok.SVal) == 1) {
                    CurTok.IVal = SB_AtUnchecked (&CurTok.SVal, 0);
                    CurTok.Tok = TOK_CHARCON;
                } else {
                    CurTok.Tok = TOK_STRCON;
                }
            } else {
                /* Always a character constant */
                NextChar ();
                if (C == EOF || IsControl (C)) {
                    Error ("Illegal character constant");
                    goto CharAgain;
                }
                CurTok.IVal = C;
                CurTok.Tok = TOK_CHARCON;
                NextChar ();
                if (C != '\'') {
                    if (!MissingCharTerm) {
                        Error ("Illegal character constant");
                    }
                } else {
                    NextChar ();
                }
            }
            return;

        case '\"':
            ReadStringConst ('\"');
            CurTok.Tok = TOK_STRCON;
            return;

        case '\\':
            /* Line continuation? */
            if (LineCont) {
                NextChar ();
                /* Next char should be a LF, if not, will result in an error later */
                if (C == '\n') {
                    /* Ignore the '\n' */
                    NextChar ();
                    goto Again;
                } else {
                    /* Make it clear what the problem is: */
                    Error ("EOL expected.");
                }
            }
            break;

        case '\n':
            NextChar ();
            CurTok.Tok = TOK_SEP;
            return;

        case EOF:
            CheckInputStack ();
            /* In case of the main file, do not close it, but return EOF. */
            if (Source && Source->Next) {
                DoneCharSource ();
                goto Again;
            } else {
                CurTok.Tok = TOK_EOF;
            }
            return;
    }

    /* If we go here, we could not identify the current character. Skip it
    ** and try again.
    */
    Error ("Invalid input character: 0x%02X", C & 0xFF);
    NextChar ();
    goto Again;
}


int GetSubKey (const char** Keys, unsigned Count)
/* Search for a subkey in a table of keywords. The current token must be an
** identifier and all keys must be in upper case. The identifier will be
** uppercased in the process. The function returns the index of the keyword,
** or -1 if the keyword was not found.
*/
{
    unsigned I;

    /* Must have an identifier */
    PRECONDITION (CurTok.Tok == TOK_IDENT);

    /* If we aren't in ignore case mode, we have to uppercase the identifier */
    if (!IgnoreCase) {
        UpcaseSVal ();
    }

    /* Do a linear search (a binary search is not worth the effort) */
    for (I = 0; I < Count; ++I) {
        if (SB_CompareStr (&CurTok.SVal, Keys [I]) == 0) {
            /* Found it */
            return I;
        }
    }

    /* Not found */
    return -1;
}


unsigned char ParseAddrSize (void)
/* Check if the next token is a keyword that denotes an address size specifier.
** If so, return the corresponding address size constant, otherwise output an
** error message and return ADDR_SIZE_DEFAULT.
*/
{
    unsigned char AddrSize;

    /* Check for an identifier */
    if (CurTok.Tok != TOK_IDENT) {
        Error ("Address size specifier expected");
        return ADDR_SIZE_DEFAULT;
    }

    /* Convert the attribute */
    AddrSize = AddrSizeFromStr (SB_GetConstBuf (&CurTok.SVal));
    if (AddrSize == ADDR_SIZE_INVALID) {
        Error ("Address size specifier expected");
        AddrSize = ADDR_SIZE_DEFAULT;
    }

    /* Done */
    return AddrSize;
}


void InitScanner (const char* InFile)
/* Initialize the scanner, open the given input file */
{
    /* Open the input file */
    NewInputFile (InFile);
}


void DoneScanner (void)
/* Release scanner resources */
{
    DoneCharSource ();
}