acme/src/input.c

525 lines
15 KiB
C
Raw Normal View History

// ACME - a crossassembler for producing 6502/65c02/65816 code.
// Copyright (C) 1998-2009 Marco Baye
// Have a look at "acme.c" for further info
//
// Input stuff
#include "config.h"
#include "alu.h"
#include "dynabuf.h"
#include "global.h"
#include "input.h"
#include "platform.h"
#include "section.h"
#include "tree.h"
// Constants
const char FILE_READBINARY[] = "rb";
#define CHAR_TAB (9) // Tab character
#define CHAR_LF (10) // line feed (in file)
// (10) // start of line (in high-level format)
#define CHAR_CR (13) // carriage return (in file)
// (13) // end of file (in high-level format)
#define CHAR_STATEMENT_DELIMITER ':'
#define CHAR_COMMENT_SEPARATOR ';'
// if the characters above are changed, don't forget to adjust ByteFlags[]!
// fake input structure (for error msgs before any real input is established)
static struct input_t outermost = {
"<none>", // file name
0, // line number
FALSE, // Faked file access, so no RAM read
INPUTSTATE_EOF, // state of input
{
NULL // RAM read pointer or file handle
}
};
// Variables
struct input_t *Input_now = &outermost; // current input structure
// End of source file ("!endoffile" or "!eof")
static enum eos_t PO_eof(void)
{
// Well, it doesn't end right here and now, but at end-of-line! :-)
Input_ensure_EOS();
Input_now->state = INPUTSTATE_EOF;
return AT_EOS_ANYWAY;
}
// predefined stuff
static struct node_t pseudo_opcodes[] = {
PREDEFNODE("eof", PO_eof),
PREDEFLAST("endoffile", PO_eof),
// ^^^^ this marks the last element
};
// Functions
// register pseudo opcodes
void Input_init(void)
{
Tree_add_table(&pseudo_opcode_tree, pseudo_opcodes);
}
// Let current input point to start of file
void Input_new_file(const char *filename, FILE *fd)
{
Input_now->original_filename = filename;
Input_now->line_number = 1;
Input_now->source_is_ram = FALSE;
Input_now->state = INPUTSTATE_NORMAL;
Input_now->src.fd = fd;
}
// Deliver source code from current file (!) in shortened high-level format
static char get_processed_from_file(void)
{
int from_file = 0;
for (;;) {
switch (Input_now->state) {
case INPUTSTATE_NORMAL:
// fetch a fresh byte from the current source file
from_file = getc(Input_now->src.fd);
// now process it
/*FALLTHROUGH*/
case INPUTSTATE_AGAIN:
// Process the latest byte again. Of course, this only
// makes sense if the loop has executed at least once,
// otherwise the contents of from_file are undefined.
// If the source is changed so there is a possibility
// to enter INPUTSTATE_AGAIN mode without first having
// defined "from_file", trouble may arise...
Input_now->state = INPUTSTATE_NORMAL;
// EOF must be checked first because it cannot be used
// as an index into Byte_flags[]
if (from_file == EOF) {
// remember to send an end-of-file
Input_now->state = INPUTSTATE_EOF;
return CHAR_EOS; // end of statement
}
// check whether character is special one
// if not, everything's cool and froody, so return it
if ((BYTEFLAGS(from_file) & BYTEIS_SYNTAX) == 0)
return (char) from_file;
// check special characters ("0x00 TAB LF CR SPC :;}")
switch (from_file) {
case CHAR_TAB: // TAB character
case ' ':
// remember to skip all following blanks
Input_now->state = INPUTSTATE_SKIPBLANKS;
return ' ';
case CHAR_LF: // LF character
// remember to send a start-of-line
Input_now->state = INPUTSTATE_LF;
return CHAR_EOS; // end of statement
case CHAR_CR: // CR character
// remember to check CRLF + send start-of-line
Input_now->state = INPUTSTATE_CR;
return CHAR_EOS; // end of statement
case CHAR_EOB:
// remember to send an end-of-block
Input_now->state = INPUTSTATE_EOB;
return CHAR_EOS; // end of statement
case CHAR_STATEMENT_DELIMITER:
// just deliver an EOS instead
return CHAR_EOS; // end of statement
case CHAR_COMMENT_SEPARATOR:
// remember to skip remainder of line
Input_now->state = INPUTSTATE_COMMENT;
return CHAR_EOS; // end of statement
default:
// complain if byte is 0
Throw_error("Source file contains illegal character.");
return (char) from_file;
}
case INPUTSTATE_SKIPBLANKS:
// read until non-blank, then deliver that
do
from_file = getc(Input_now->src.fd);
while ((from_file == CHAR_TAB) || (from_file == ' '));
// re-process last byte
Input_now->state = INPUTSTATE_AGAIN;
break;
case INPUTSTATE_LF:
// return start-of-line, then continue in normal mode
Input_now->state = INPUTSTATE_NORMAL;
return CHAR_SOL; // new line
case INPUTSTATE_CR:
// return start-of-line, remember to check for LF
Input_now->state = INPUTSTATE_SKIPLF;
return CHAR_SOL; // new line
case INPUTSTATE_SKIPLF:
from_file = getc(Input_now->src.fd);
// if LF, ignore it and fetch another byte
// otherwise, process current byte
if (from_file == CHAR_LF)
Input_now->state = INPUTSTATE_NORMAL;
else
Input_now->state = INPUTSTATE_AGAIN;
break;
case INPUTSTATE_COMMENT:
// read until end-of-line or end-of-file
do
from_file = getc(Input_now->src.fd);
while ((from_file != EOF) && (from_file != CHAR_CR) && (from_file != CHAR_LF));
// re-process last byte
Input_now->state = INPUTSTATE_AGAIN;
break;
case INPUTSTATE_EOB:
// deliver EOB
Input_now->state = INPUTSTATE_NORMAL;
return CHAR_EOB; // end of block
case INPUTSTATE_EOF:
// deliver EOF
Input_now->state = INPUTSTATE_NORMAL;
return CHAR_EOF; // end of file
default:
Bug_found("StrangeInputMode", Input_now->state);
}
}
}
// This function delivers the next byte from the currently active byte source
// in shortened high-level format. FIXME - use fn ptr?
// When inside quotes, use GetQuotedByte() instead!
char GetByte(void)
{
// for (;;) {
// If byte source is RAM, then no conversions are
// necessary, because in RAM the source already has
// high-level format
// Otherwise, the source is a file. This means we will call
// GetFormatted() which will do a shit load of conversions.
if (Input_now->source_is_ram)
GotByte = *(Input_now->src.ram_ptr++);
else
GotByte = get_processed_from_file();
// // if start-of-line was read, increment line counter and repeat
// if (GotByte != CHAR_SOL)
// return GotByte;
// Input_now->line_number++;
// }
if (GotByte == CHAR_SOL)
Input_now->line_number++;
return GotByte;
}
// This function delivers the next byte from the currently active byte source
// in un-shortened high-level format.
// This function complains if CHAR_EOS (end of statement) is read.
char GetQuotedByte(void)
{
int from_file; // must be an int to catch EOF
// if byte source is RAM, then no conversion is necessary,
// because in RAM the source already has high-level format
if (Input_now->source_is_ram) {
GotByte = *(Input_now->src.ram_ptr++);
// Otherwise, the source is a file.
} else {
// fetch a fresh byte from the current source file
from_file = getc(Input_now->src.fd);
switch (from_file) {
case EOF:
// remember to send an end-of-file
Input_now->state = INPUTSTATE_EOF;
GotByte = CHAR_EOS; // end of statement
break;
case CHAR_LF: // LF character
// remember to send a start-of-line
Input_now->state = INPUTSTATE_LF;
GotByte = CHAR_EOS; // end of statement
break;
case CHAR_CR: // CR character
// remember to check for CRLF + send a start-of-line
Input_now->state = INPUTSTATE_CR;
GotByte = CHAR_EOS; // end of statement
break;
default:
GotByte = from_file;
}
}
// now check for end of statement
if (GotByte == CHAR_EOS)
Throw_error("Quotes still open at end of line.");
return GotByte;
}
// Skip remainder of statement, for example on error
void Input_skip_remainder(void)
{
while (GotByte)
GetByte(); // Read characters until end-of-statement
}
// Ensure that the remainder of the current statement is empty, for example
// after mnemonics using implied addressing.
void Input_ensure_EOS(void) // Now GotByte = first char to test
{
SKIPSPACE();
if (GotByte) {
Throw_error("Garbage data at end of statement.");
Input_skip_remainder();
}
}
// Skip or store block (starting with next byte, so call directly after
// reading opening brace).
// If "Store" is TRUE, the block is read into GlobalDynaBuf, then a copy
// is made and a pointer to that is returned.
// If "Store" is FALSE, NULL is returned.
// After calling this function, GotByte holds '}'. Unless EOF was found first,
// but then a serious error would have been thrown.
char *Input_skip_or_store_block(int store)
{
char byte;
int depth = 1; // to find matching block end
// prepare global dynamic buffer
DYNABUF_CLEAR(GlobalDynaBuf);
do {
byte = GetByte();
// if wanted, store
if (store)
DYNABUF_APPEND(GlobalDynaBuf, byte);
// now check for some special characters
switch (byte) {
case CHAR_EOF: // End-of-file in block? Sorry, no way.
Throw_serious_error(exception_no_right_brace);
case '"': // Quotes? Okay, read quoted stuff.
case '\'':
do {
GetQuotedByte();
// if wanted, store
if (store)
DYNABUF_APPEND(GlobalDynaBuf, GotByte);
} while ((GotByte != CHAR_EOS) && (GotByte != byte));
break;
case CHAR_SOB:
depth++;
break;
case CHAR_EOB:
depth--;
break;
}
} while (depth);
// in case of skip, return now
if (!store)
return NULL;
// otherwise, prepare to return copy of block
// add EOF, just to make sure block is never read too far
DynaBuf_append(GlobalDynaBuf, CHAR_EOS);
DynaBuf_append(GlobalDynaBuf, CHAR_EOF);
// return pointer to copy
return DynaBuf_get_copy(GlobalDynaBuf);
}
// Read bytes and add to GlobalDynaBuf until the given terminator (or CHAR_EOS)
// is found. Act upon single and double quotes by entering (and leaving) quote
// mode as needed (So the terminator does not terminate when inside quotes).
void Input_until_terminator(char terminator)
{
char byte = GotByte;
for (;;) {
// Terminator? Exit. EndOfStatement? Exit.
if ((byte == terminator) || (byte == CHAR_EOS))
return;
// otherwise, append to GlobalDynaBuf and check for quotes
DYNABUF_APPEND(GlobalDynaBuf, byte);
if ((byte == '"') || (byte == '\'')) {
do {
// Okay, read quoted stuff.
GetQuotedByte(); // throws error on EOS
DYNABUF_APPEND(GlobalDynaBuf, GotByte);
} while ((GotByte != CHAR_EOS) && (GotByte != byte));
// on error, exit now, before calling GetByte()
if (GotByte != byte)
return;
}
byte = GetByte();
}
}
// Append to GlobalDynaBuf while characters are legal for keywords.
// Throws "missing string" error if none.
// Returns number of characters added.
int Input_append_keyword_to_global_dynabuf(void)
{
int length = 0;
// add characters to buffer until an illegal one comes along
while (BYTEFLAGS(GotByte) & CONTS_KEYWORD) {
DYNABUF_APPEND(GlobalDynaBuf, GotByte);
length++;
GetByte();
}
if (length == 0)
Throw_error(exception_missing_string);
return length;
}
// Check whether GotByte is LOCAL_PREFIX (default '.').
// If not, store global zone value.
// If yes, store current zone value and read next byte.
// Then jump to Input_read_keyword(), which returns length of keyword.
int Input_read_zone_and_keyword(zone_t *zone)
{
SKIPSPACE();
if (GotByte == LOCAL_PREFIX) {
GetByte();
*zone = Section_now->zone;
} else {
*zone = ZONE_GLOBAL;
}
return Input_read_keyword();
}
// Clear dynamic buffer, then append to it until an illegal (for a keyword)
// character is read. Zero-terminate the string. Return its length (without
// terminator).
// Zero lengths will produce a "missing string" error.
int Input_read_keyword(void)
{
int length;
DYNABUF_CLEAR(GlobalDynaBuf);
length = Input_append_keyword_to_global_dynabuf();
// add terminator to buffer (increments buffer's length counter)
DynaBuf_append(GlobalDynaBuf, '\0');
return length;
}
// Clear dynamic buffer, then append to it until an illegal (for a keyword)
// character is read. Zero-terminate the string, then convert to lower case.
// Return its length (without terminator).
// Zero lengths will produce a "missing string" error.
int Input_read_and_lower_keyword(void)
{
int length;
DYNABUF_CLEAR(GlobalDynaBuf);
length = Input_append_keyword_to_global_dynabuf();
// add terminator to buffer (increments buffer's length counter)
DynaBuf_append(GlobalDynaBuf, '\0');
DynaBuf_to_lower(GlobalDynaBuf, GlobalDynaBuf); // convert to lower case
return length;
}
// Try to read a file name. If "allow_library" is TRUE, library access by using
// <...> quoting is possible as well. The file name given in the assembler
// source code is converted from UNIX style to platform style.
// Returns whether error occurred (TRUE on error). Filename in GlobalDynaBuf.
// Errors are handled and reported, but caller should call
// Input_skip_remainder() then.
int Input_read_filename(int allow_library)
{
char *lib_prefix,
end_quote;
DYNABUF_CLEAR(GlobalDynaBuf);
SKIPSPACE();
// check for library access
if (GotByte == '<') {
// if library access forbidden, complain
if (allow_library == FALSE) {
Throw_error("Writing to library not supported.");
return TRUE;
}
// read platform's lib prefix
lib_prefix = PLATFORM_LIBPREFIX;
#ifndef NO_NEED_FOR_ENV_VAR
// if lib prefix not set, complain
if (lib_prefix == NULL) {
Throw_error("\"ACME\" environment variable not found.");
return TRUE;
}
#endif
// copy lib path and set quoting char
DynaBuf_add_string(GlobalDynaBuf, lib_prefix);
end_quote = '>';
} else {
if (GotByte == '"') {
end_quote = '"';
} else {
Throw_error("File name quotes not found (\"\" or <>).");
return TRUE;
}
}
// read first character, complain if closing quote
if (GetQuotedByte() == end_quote) {
Throw_error("No file name given.");
return TRUE;
}
// read characters until closing quote (or EOS) is reached
// append platform-converted characters to current string
while ((GotByte != CHAR_EOS) && (GotByte != end_quote)) {
DYNABUF_APPEND(GlobalDynaBuf, PLATFORM_CONVERTPATHCHAR(GotByte));
GetQuotedByte();
}
// on error, return
if (GotByte == CHAR_EOS)
return TRUE;
GetByte(); // fetch next to forget closing quote
// terminate string
DynaBuf_append(GlobalDynaBuf, '\0'); // add terminator
return FALSE; // no error
}
// Try to read a comma, skipping spaces before and after. Return TRUE if comma
// found, otherwise FALSE.
int Input_accept_comma(void)
{
SKIPSPACE();
if (GotByte != ',')
return FALSE;
NEXTANDSKIPSPACE();
return TRUE;
}
// read optional info about parameter length
int Input_get_force_bit(void)
{
char byte;
int force_bit = 0;
if (GotByte == '+') {
byte = GetByte();
if (byte == '1')
force_bit = MVALUE_FORCE08;
else if (byte == '2')
force_bit = MVALUE_FORCE16;
else if (byte == '3')
force_bit = MVALUE_FORCE24;
if (force_bit)
GetByte();
else
Throw_error("Illegal postfix.");
}
SKIPSPACE();
return force_bit;
}