acme/src/input.c

665 lines
19 KiB
C

// ACME - a crossassembler for producing 6502/65c02/65816/65ce02 code.
// Copyright (C) 1998-2017 Marco Baye
// Have a look at "acme.c" for further info
//
// Input stuff
// 19 Nov 2014 Merged Johann Klasek's report listing generator patch
// 9 Jan 2018 Allowed "//" comments
#include "input.h"
#include "config.h"
#include "alu.h"
#include "dynabuf.h"
#include "global.h" // FIXME - remove when no longer needed
#include "platform.h"
#include "section.h"
#include "symbol.h"
#include "tree.h"
// Constants
const char FILE_READBINARY[] = "rb";
#define CHAR_TAB (9) // Tab character
#define CHAR_LF (10) // line feed (in file)
// (10) // start of line (in high-level format)
#define CHAR_CR (13) // carriage return (in file)
// (13) // end of file (in high-level format)
#define CHAR_STATEMENT_DELIMITER ':'
#define CHAR_COMMENT_SEPARATOR ';'
// if the characters above are changed, don't forget to adjust ByteFlags[]!
// fake input structure (for error msgs before any real input is established)
static struct input outermost = {
"<none>", // file name
0, // line number
FALSE, // Faked file access, so no RAM read
INPUTSTATE_EOF, // state of input
{
NULL // RAM read pointer or file handle
}
};
// variables
struct input *Input_now = &outermost; // current input structure
// functions
// let current input point to start of file
void Input_new_file(const char *filename, FILE *fd)
{
Input_now->original_filename = filename;
Input_now->line_number = 1;
Input_now->source_is_ram = FALSE;
Input_now->state = INPUTSTATE_NORMAL;
Input_now->src.fd = fd;
}
// remember source code character for report generator
#define HEXBUFSIZE 9 // actually, 4+1 is enough, but for systems without snprintf(), let's be extra-safe.
#define IF_WANTED_REPORT_SRCCHAR(c) do { if (report->fd) report_srcchar(c); } while(0)
static void report_srcchar(char new_char)
{
static char prev_char = '\0';
int ii;
char hex_address[HEXBUFSIZE];
char hexdump[2 * REPORT_BINBUFSIZE + 2]; // +2 for '.' and terminator
// if input has changed, insert explanation
if (Input_now != report->last_input) {
fprintf(report->fd, "\n; ******** Source: %s\n", Input_now->original_filename);
report->last_input = Input_now;
report->asc_used = 0; // clear buffer
prev_char = '\0';
}
if (prev_char == '\n') {
// line start after line break detected and EOS processed,
// build report line:
// show line number...
fprintf(report->fd, "%6d ", Input_now->line_number - 1);
// prepare outbytes' start address
if (report->bin_used)
#if _BSD_SOURCE || _XOPEN_SOURCE >= 500 || _ISOC99_SOURCE || _POSIX_C_SOURCE >= 200112L
snprintf(hex_address, HEXBUFSIZE, "%04x", report->bin_address);
#else
sprintf(hex_address, "%04x", report->bin_address);
#endif
else
hex_address[0] = '\0';
// prepare outbytes
hexdump[0] = '\0';
for (ii = 0; ii < report->bin_used; ++ii)
sprintf(hexdump + 2 * ii, "%02x", (unsigned int) (unsigned char) (report->bin_buf[ii]));
// if binary buffer is full, overwrite last byte with "..."
if (report->bin_used == REPORT_BINBUFSIZE)
sprintf(hexdump + 2 * (REPORT_BINBUFSIZE - 1), "...");
// show address and bytes
fprintf(report->fd, "%-4s %-19s", hex_address, hexdump);
// at this point the output should be a multiple of 8 characters
// so far to preserve tabs of the source...
if (report->asc_used == REPORT_ASCBUFSIZE)
--report->asc_used;
report->asc_buf[report->asc_used] = '\0';
fprintf(report->fd, "%s\n", report->asc_buf); // show source line
report->asc_used = 0; // reset buffers
report->bin_used = 0;
}
if (new_char != '\n' && new_char != '\r') { // detect line break
if (report->asc_used < REPORT_ASCBUFSIZE)
report->asc_buf[report->asc_used++] = new_char;
}
prev_char = new_char;
}
// Deliver source code from current file (!) in shortened high-level format
static char get_processed_from_file(void)
{
static int from_file = 0;
for (;;) {
switch (Input_now->state) {
case INPUTSTATE_NORMAL:
// fetch a fresh byte from the current source file
from_file = getc(Input_now->src.fd);
IF_WANTED_REPORT_SRCCHAR(from_file);
// now process it
/*FALLTHROUGH*/
case INPUTSTATE_AGAIN:
// Process the latest byte again. Of course, this only
// makes sense if the loop has executed at least once,
// otherwise the contents of from_file are undefined.
// If the source is changed so there is a possibility
// to enter INPUTSTATE_AGAIN mode without first having
// defined "from_file", trouble may arise...
Input_now->state = INPUTSTATE_NORMAL;
// EOF must be checked first because it cannot be used
// as an index into Byte_flags[]
if (from_file == EOF) {
// remember to send an end-of-file
Input_now->state = INPUTSTATE_EOF;
return CHAR_EOS; // end of statement
}
// check whether character is special one
// if not, everything's cool and froody, so return it
if ((BYTEFLAGS(from_file) & BYTEIS_SYNTAX) == 0)
return (char) from_file;
// check special characters ("0x00 TAB LF CR SPC / : ; }")
switch (from_file) {
case CHAR_TAB: // TAB character
case ' ':
// remember to skip all following blanks
Input_now->state = INPUTSTATE_SKIPBLANKS;
return ' ';
case CHAR_LF: // LF character
// remember to send a start-of-line
Input_now->state = INPUTSTATE_LF;
return CHAR_EOS; // end of statement
case CHAR_CR: // CR character
// remember to check CRLF + send start-of-line
Input_now->state = INPUTSTATE_CR;
return CHAR_EOS; // end of statement
case CHAR_EOB:
// remember to send an end-of-block
Input_now->state = INPUTSTATE_EOB;
return CHAR_EOS; // end of statement
case '/':
// to check for "//", get another byte:
from_file = getc(Input_now->src.fd);
IF_WANTED_REPORT_SRCCHAR(from_file);
if (from_file != '/') {
// not "//", so:
Input_now->state = INPUTSTATE_AGAIN; // second byte must be parsed normally later on
return '/'; // first byte is returned normally right now
}
// it's really "//", so act as if ';'
/*FALLTHROUGH*/
case CHAR_COMMENT_SEPARATOR:
// remember to skip remainder of line
Input_now->state = INPUTSTATE_COMMENT;
return CHAR_EOS; // end of statement
case CHAR_STATEMENT_DELIMITER:
// just deliver an EOS instead
return CHAR_EOS; // end of statement
default:
// complain if byte is 0
Throw_error("Source file contains illegal character.");
return (char) from_file;
}
case INPUTSTATE_SKIPBLANKS:
// read until non-blank, then deliver that
do {
from_file = getc(Input_now->src.fd);
IF_WANTED_REPORT_SRCCHAR(from_file);
} while ((from_file == CHAR_TAB) || (from_file == ' '));
// re-process last byte
Input_now->state = INPUTSTATE_AGAIN;
break;
case INPUTSTATE_LF:
// return start-of-line, then continue in normal mode
Input_now->state = INPUTSTATE_NORMAL;
return CHAR_SOL; // new line
case INPUTSTATE_CR:
// return start-of-line, remember to check for LF
Input_now->state = INPUTSTATE_SKIPLF;
return CHAR_SOL; // new line
case INPUTSTATE_SKIPLF:
from_file = getc(Input_now->src.fd);
IF_WANTED_REPORT_SRCCHAR(from_file);
// if LF, ignore it and fetch another byte
// otherwise, process current byte
if (from_file == CHAR_LF)
Input_now->state = INPUTSTATE_NORMAL;
else
Input_now->state = INPUTSTATE_AGAIN;
break;
case INPUTSTATE_COMMENT:
// read until end-of-line or end-of-file
do {
from_file = getc(Input_now->src.fd);
IF_WANTED_REPORT_SRCCHAR(from_file);
} while ((from_file != EOF) && (from_file != CHAR_CR) && (from_file != CHAR_LF));
// re-process last byte
Input_now->state = INPUTSTATE_AGAIN;
break;
case INPUTSTATE_EOB:
// deliver EOB
Input_now->state = INPUTSTATE_NORMAL;
return CHAR_EOB; // end of block
case INPUTSTATE_EOF:
// deliver EOF
Input_now->state = INPUTSTATE_NORMAL;
return CHAR_EOF; // end of file
default:
Bug_found("StrangeInputMode", Input_now->state);
}
}
}
// This function delivers the next byte from the currently active byte source
// in shortened high-level format. FIXME - use fn ptr?
// When inside quotes, use GetQuotedByte() instead!
char GetByte(void)
{
// for (;;) {
// If byte source is RAM, then no conversions are
// necessary, because in RAM the source already has
// high-level format
// Otherwise, the source is a file. This means we will call
// GetFormatted() which will do a shit load of conversions.
if (Input_now->source_is_ram)
GotByte = *(Input_now->src.ram_ptr++);
else
GotByte = get_processed_from_file();
// // if start-of-line was read, increment line counter and repeat
// if (GotByte != CHAR_SOL)
// return GotByte;
// Input_now->line_number++;
// }
if (GotByte == CHAR_SOL)
Input_now->line_number++;
return GotByte;
}
// This function delivers the next byte from the currently active byte source
// in un-shortened high-level format.
// This function complains if CHAR_EOS (end of statement) is read.
char GetQuotedByte(void)
{
int from_file; // must be an int to catch EOF
// if byte source is RAM, then no conversion is necessary,
// because in RAM the source already has high-level format
if (Input_now->source_is_ram) {
GotByte = *(Input_now->src.ram_ptr++);
// Otherwise, the source is a file.
} else {
// fetch a fresh byte from the current source file
from_file = getc(Input_now->src.fd);
IF_WANTED_REPORT_SRCCHAR(from_file);
switch (from_file) {
case EOF:
// remember to send an end-of-file
Input_now->state = INPUTSTATE_EOF;
GotByte = CHAR_EOS; // end of statement
break;
case CHAR_LF: // LF character
// remember to send a start-of-line
Input_now->state = INPUTSTATE_LF;
GotByte = CHAR_EOS; // end of statement
break;
case CHAR_CR: // CR character
// remember to check for CRLF + send a start-of-line
Input_now->state = INPUTSTATE_CR;
GotByte = CHAR_EOS; // end of statement
break;
default:
GotByte = from_file;
}
}
// now check for end of statement
if (GotByte == CHAR_EOS)
Throw_error("Quotes still open at end of line.");
return GotByte;
}
// Skip remainder of statement, for example on error
void Input_skip_remainder(void)
{
while (GotByte)
GetByte(); // Read characters until end-of-statement
}
// Ensure that the remainder of the current statement is empty, for example
// after mnemonics using implied addressing.
void Input_ensure_EOS(void) // Now GotByte = first char to test
{
SKIPSPACE();
if (GotByte) {
Throw_error("Garbage data at end of statement.");
Input_skip_remainder();
}
}
// Skip or store block (starting with next byte, so call directly after
// reading opening brace).
// If "Store" is TRUE, the block is read into GlobalDynaBuf, then a copy
// is made and a pointer to that is returned.
// If "Store" is FALSE, NULL is returned.
// After calling this function, GotByte holds '}'. Unless EOF was found first,
// but then a serious error would have been thrown.
// FIXME - use a struct block *ptr argument!
char *Input_skip_or_store_block(int store)
{
char byte;
int depth = 1; // to find matching block end
// prepare global dynamic buffer
DYNABUF_CLEAR(GlobalDynaBuf);
do {
byte = GetByte();
// if wanted, store
if (store)
DYNABUF_APPEND(GlobalDynaBuf, byte);
// now check for some special characters
switch (byte) {
case CHAR_EOF: // End-of-file in block? Sorry, no way.
Throw_serious_error(exception_no_right_brace);
case '"': // Quotes? Okay, read quoted stuff.
case '\'':
do {
GetQuotedByte();
// if wanted, store
if (store)
DYNABUF_APPEND(GlobalDynaBuf, GotByte);
} while ((GotByte != CHAR_EOS) && (GotByte != byte));
break;
case CHAR_SOB:
++depth;
break;
case CHAR_EOB:
--depth;
break;
}
} while (depth);
// in case of skip, return now
if (!store)
return NULL;
// otherwise, prepare to return copy of block
// add EOF, just to make sure block is never read too far
DynaBuf_append(GlobalDynaBuf, CHAR_EOS);
DynaBuf_append(GlobalDynaBuf, CHAR_EOF);
// return pointer to copy
return DynaBuf_get_copy(GlobalDynaBuf);
}
// Read bytes and add to GlobalDynaBuf until the given terminator (or CHAR_EOS)
// is found. Act upon single and double quotes by entering (and leaving) quote
// mode as needed (So the terminator does not terminate when inside quotes).
void Input_until_terminator(char terminator)
{
char byte = GotByte;
for (;;) {
// Terminator? Exit. EndOfStatement? Exit.
if ((byte == terminator) || (byte == CHAR_EOS))
return;
// otherwise, append to GlobalDynaBuf and check for quotes
DYNABUF_APPEND(GlobalDynaBuf, byte);
if ((byte == '"') || (byte == '\'')) {
do {
// Okay, read quoted stuff.
GetQuotedByte(); // throws error on EOS
DYNABUF_APPEND(GlobalDynaBuf, GotByte);
} while ((GotByte != CHAR_EOS) && (GotByte != byte));
// on error, exit now, before calling GetByte()
if (GotByte != byte)
return;
}
byte = GetByte();
}
}
// Append to GlobalDynaBuf while characters are legal for keywords.
// Throws "missing string" error if none.
// Returns number of characters added.
int Input_append_keyword_to_global_dynabuf(void)
{
int length = 0;
// add characters to buffer until an illegal one comes along
while (BYTEFLAGS(GotByte) & CONTS_KEYWORD) {
DYNABUF_APPEND(GlobalDynaBuf, GotByte);
++length;
GetByte();
}
if (length == 0)
Throw_error(exception_missing_string);
return length;
}
// Check GotByte.
// If LOCAL_PREFIX ('.'), store current local scope value and read next byte.
// If CHEAP_PREFIX ('@'), store current cheap scope value and read next byte.
// Otherwise, store global scope value.
// Then jump to Input_read_keyword(), which returns length of keyword.
int Input_read_scope_and_keyword(scope_t *scope)
{
SKIPSPACE();
if (GotByte == LOCAL_PREFIX) {
GetByte();
*scope = section_now->local_scope;
} else if (GotByte == CHEAP_PREFIX) {
GetByte();
*scope = section_now->cheap_scope;
} else {
*scope = SCOPE_GLOBAL;
}
return Input_read_keyword();
}
// Clear dynamic buffer, then append to it until an illegal (for a keyword)
// character is read. Zero-terminate the string. Return its length (without
// terminator).
// Zero lengths will produce a "missing string" error.
int Input_read_keyword(void)
{
int length;
DYNABUF_CLEAR(GlobalDynaBuf);
length = Input_append_keyword_to_global_dynabuf();
// add terminator to buffer (increments buffer's length counter)
DynaBuf_append(GlobalDynaBuf, '\0');
return length;
}
// Clear dynamic buffer, then append to it until an illegal (for a keyword)
// character is read. Zero-terminate the string, then convert to lower case.
// Return its length (without terminator).
// Zero lengths will produce a "missing string" error.
int Input_read_and_lower_keyword(void)
{
int length;
DYNABUF_CLEAR(GlobalDynaBuf);
length = Input_append_keyword_to_global_dynabuf();
// add terminator to buffer (increments buffer's length counter)
DynaBuf_append(GlobalDynaBuf, '\0');
DynaBuf_to_lower(GlobalDynaBuf, GlobalDynaBuf); // convert to lower case
return length;
}
// Try to read a file name.
// If "allow_library" is TRUE, library access by using <...> quoting
// is possible as well. If "uses_lib" is non-NULL, info about library
// usage is stored there.
// The file name given in the assembler source code is converted from
// UNIX style to platform style.
// Returns whether error occurred (TRUE on error). Filename in GlobalDynaBuf.
// Errors are handled and reported, but caller should call
// Input_skip_remainder() then.
int Input_read_filename(int allow_library, int *uses_lib)
{
char *lib_prefix,
end_quote;
DYNABUF_CLEAR(GlobalDynaBuf);
SKIPSPACE();
// check for library access
if (GotByte == '<') {
if (uses_lib)
*uses_lib = 1;
// if library access forbidden, complain
if (allow_library == FALSE) {
Throw_error("Writing to library not supported.");
return TRUE;
}
// read platform's lib prefix
lib_prefix = PLATFORM_LIBPREFIX;
#ifndef NO_NEED_FOR_ENV_VAR
// if lib prefix not set, complain
if (lib_prefix == NULL) {
Throw_error("\"ACME\" environment variable not found.");
return TRUE;
}
#endif
// copy lib path and set quoting char
DynaBuf_add_string(GlobalDynaBuf, lib_prefix);
end_quote = '>';
} else {
if (uses_lib)
*uses_lib = 0;
if (GotByte == '"') {
end_quote = '"';
} else {
Throw_error("File name quotes not found (\"\" or <>).");
return TRUE;
}
}
// read first character, complain if closing quote
if (GetQuotedByte() == end_quote) {
Throw_error("No file name given.");
return TRUE;
}
// read characters until closing quote (or EOS) is reached
// append platform-converted characters to current string
while ((GotByte != CHAR_EOS) && (GotByte != end_quote)) {
DYNABUF_APPEND(GlobalDynaBuf, PLATFORM_CONVERTPATHCHAR(GotByte));
GetQuotedByte();
}
// on error, return
if (GotByte == CHAR_EOS)
return TRUE;
GetByte(); // fetch next to forget closing quote
// terminate string
DynaBuf_append(GlobalDynaBuf, '\0'); // add terminator
return FALSE; // no error
}
// Try to read a comma, skipping spaces before and after. Return TRUE if comma
// found, otherwise FALSE.
int Input_accept_comma(void)
{
SKIPSPACE();
if (GotByte != ',')
return FALSE;
NEXTANDSKIPSPACE();
return TRUE;
}
// read optional info about parameter length
int Input_get_force_bit(void)
{
char byte;
int force_bit = 0;
if (GotByte == '+') {
byte = GetByte();
if (byte == '1')
force_bit = MVALUE_FORCE08;
else if (byte == '2')
force_bit = MVALUE_FORCE16;
else if (byte == '3')
force_bit = MVALUE_FORCE24;
if (force_bit)
GetByte();
else
Throw_error("Illegal postfix.");
}
SKIPSPACE();
return force_bit;
}
// include path stuff - should be moved to its own file:
// ring list struct
struct ipi {
struct ipi *next,
*prev;
const char *path;
};
static struct ipi ipi_head; // head element
static struct dynabuf *pathbuf; // buffer to combine search path and file spec
// init list
void includepaths_init(void)
{
// init ring list
ipi_head.next = &ipi_head;
ipi_head.prev = &ipi_head;
// init dynabuf
pathbuf = DynaBuf_create(256);
}
// add entry
void includepaths_add(const char *path)
{
struct ipi *ipi;
ipi = safe_malloc(sizeof(*ipi));
ipi->path = path;
ipi->next = &ipi_head;
ipi->prev = ipi_head.prev;
ipi->next->prev = ipi;
ipi->prev->next = ipi;
}
// open file for reading (trying list entries as prefixes)
// "uses_lib" tells whether to access library or to make use of include paths
// file name is expected in GlobalDynaBuf
FILE *includepaths_open_ro(int uses_lib)
{
FILE *stream;
struct ipi *ipi;
// first try directly, regardless of whether lib or not:
stream = fopen(GLOBALDYNABUF_CURRENT, FILE_READBINARY);
// if failed and not lib, try include paths:
if ((stream == NULL) && !uses_lib) {
for (ipi = ipi_head.next; ipi != &ipi_head; ipi = ipi->next) {
DYNABUF_CLEAR(pathbuf);
// add first part
DynaBuf_add_string(pathbuf, ipi->path);
// if wanted and possible, ensure last char is directory separator
if (DIRECTORY_SEPARATOR
&& pathbuf->size
&& (pathbuf->buffer[pathbuf->size - 1] != DIRECTORY_SEPARATOR))
DynaBuf_append(pathbuf, DIRECTORY_SEPARATOR);
// add second part
DynaBuf_add_string(pathbuf, GLOBALDYNABUF_CURRENT);
// terminate
DynaBuf_append(pathbuf, '\0');
// try
stream = fopen(pathbuf->buffer, FILE_READBINARY);
//printf("trying <<%s>> - ", pathbuf->buffer);
if (stream) {
//printf("ok\n");
break;
} else {
//printf("failed\n");
}
}
}
if (stream == NULL)
Throw_error(exception_cannot_open_input_file);
return stream;
}