acme/src/global.c

589 lines
19 KiB
C

// ACME - a crossassembler for producing 6502/65c02/65816/65ce02 code.
// Copyright (C) 1998-2020 Marco Baye
// Have a look at "acme.c" for further info
//
// Global stuff - things that are needed by several modules
// 4 Oct 2006 Fixed a typo in a comment
// 22 Nov 2007 Added warn_on_indented_labels
// 2 Jun 2014 Added warn_on_old_for and warn_on_type_mismatch
// 19 Nov 2014 Merged Johann Klasek's report listing generator patch
// 23 Nov 2014 Merged Martin Piper's "--msvc" error output patch
// 9 Jan 2018 Made '/' a syntax char to allow for "//" comments
// 14 Apr 2020 Added config vars for "ignore zeroes" and "segment warnings to errors"
#include "global.h"
#include <stdio.h>
#include "platform.h"
#include "acme.h"
#include "alu.h"
#include "cpu.h"
#include "dynabuf.h"
#include "encoding.h"
#include "input.h"
#include "macro.h"
#include "output.h"
#include "pseudoopcodes.h"
#include "section.h"
#include "symbol.h"
#include "tree.h"
#include "typesystem.h"
// constants
char s_untitled[] = "<untitled>"; // FIXME - this is actually const
// Exception messages during assembly
const char exception_missing_string[] = "No string given.";
const char exception_negative_size[] = "Negative size argument.";
const char exception_no_left_brace[] = "Missing '{'.";
const char exception_no_memory_left[] = "Out of memory.";
const char exception_no_right_brace[] = "Found end-of-file instead of '}'.";
//const char exception_not_yet[] = "Sorry, feature not yet implemented.";
// TODO - show actual value in error message
const char exception_number_out_of_range[] = "Number out of range.";
const char exception_number_out_of_8b_range[] = "Number does not fit in 8 bits.";
static const char exception_number_out_of_16b_range[] = "Number does not fit in 16 bits.";
static const char exception_number_out_of_24b_range[] = "Number does not fit in 24 bits.";
const char exception_pc_undefined[] = "Program counter undefined.";
const char exception_symbol_defined[] = "Symbol already defined.";
const char exception_syntax[] = "Syntax error.";
// default value for number of errors before exiting
#define MAXERRORS 10
// Flag table:
// This table contains flags for all the 256 possible byte values. The
// assembler reads the table whenever it needs to know whether a byte is
// allowed to be in a label name, for example.
// Bits Meaning when set
// 7....... Byte allowed to start keyword
// .6...... Byte allowed in keyword
// ..5..... Byte is upper case, can be lowercased by OR-ing this bit(!)
// ...4.... special character for input syntax: 0x00 TAB LF CR SPC / : ; }
// ....3... preceding sequence of '-' characters is anonymous backward
// label. Currently only set for ')', ',' and CHAR_EOS.
// .....210 currently unused
const char global_byte_flags[256] = {
/*$00*/ 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,// control characters
0x00, 0x10, 0x10, 0x00, 0x00, 0x10, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/*$20*/ 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,// " !"#$%&'"
0x00, 0x08, 0x00, 0x00, 0x08, 0x00, 0x00, 0x10,// "()*+,-./"
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,// "01234567"
0x40, 0x40, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00,// "89:;<=>?"
/*$40*/ 0x00, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,// "@ABCDEFG"
0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,// "HIJKLMNO"
0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,// "PQRSTUVW"
0xe0, 0xe0, 0xe0, 0x00, 0x00, 0x00, 0x00, 0xc0,// "XYZ[\]^_"
/*$60*/ 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,// "`abcdefg"
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,// "hijklmno"
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,// "pqrstuvw"
0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x10, 0x00, 0x00,// "xyz{|}~" BACKSPACE
/*$80*/ 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,// umlauts etc. ...
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
/*$a0*/ 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
/*$c0*/ 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
/*$e0*/ 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
};
// variables
char GotByte; // Last byte read (processed)
struct report *report = NULL;
struct config config;
struct pass pass;
// set configuration to default values
void config_default(struct config *conf)
{
conf->pseudoop_prefix = '!'; // can be changed to '.' by CLI switch
conf->process_verbosity = 0; // level of additional output
conf->warn_on_indented_labels = TRUE; // warn if indented label is encountered
conf->warn_on_type_mismatch = FALSE; // use type-checking system
conf->warn_bin_mask = 3; // %11 -> warn if not divisible by four
conf->max_errors = MAXERRORS; // errors before giving up
conf->format_msvc = FALSE; // enabled by --msvc
conf->format_color = FALSE; // enabled by --color
conf->msg_stream = stderr; // set to stdout by --use-stdout
conf->honor_leading_zeroes = TRUE; // disabled by --ignore-zeroes
conf->segment_warning_is_error = FALSE; // enabled by --strict-segments TODO - toggle default?
conf->test_new_features = FALSE; // enabled by --test
conf->wanted_version = VER_CURRENT; // changed by --dialect
}
// memory allocation stuff
// allocate memory and die if not available
void *safe_malloc(size_t size)
{
void *block;
if ((block = malloc(size)) == NULL)
Throw_serious_error(exception_no_memory_left);
return block;
}
// Parser stuff
// Check and return whether first label of statement. Complain if not.
static int first_label_of_statement(bits *statement_flags)
{
if ((*statement_flags) & SF_IMPLIED_LABEL) {
Throw_error(exception_syntax);
Input_skip_remainder();
return FALSE;
}
(*statement_flags) |= SF_IMPLIED_LABEL; // now there has been one
return TRUE;
}
// parse label definition (can be either global or local).
// name must be held in GlobalDynaBuf.
// called by parse_symbol_definition, parse_backward_anon_def, parse_forward_anon_def
// "powers" is used by backward anons to allow changes
static void set_label(scope_t scope, bits stat_flags, bits force_bit, bits powers)
{
struct symbol *symbol;
struct number pc;
struct object result;
if ((stat_flags & SF_FOUND_BLANK) && config.warn_on_indented_labels)
Throw_first_pass_warning("Label name not in leftmost column.");
symbol = symbol_find(scope);
vcpu_read_pc(&pc); // FIXME - if undefined, check pass.complain_about_undefined and maybe throw "value not defined"!
result.type = &type_number;
result.u.number.ntype = NUMTYPE_INT; // FIXME - if undefined, use NUMTYPE_UNDEFINED!
result.u.number.flags = 0;
result.u.number.val.intval = pc.val.intval;
result.u.number.addr_refs = pc.addr_refs;
symbol_set_object(symbol, &result, powers);
if (force_bit)
symbol_set_force_bit(symbol, force_bit);
symbol->pseudopc = pseudopc_get_context();
// global labels must open new scope for cheap locals
if (scope == SCOPE_GLOBAL)
section_new_cheap_scope(section_now);
}
// call with symbol name in GlobalDynaBuf and GotByte == '='
// "powers" is for "!set" pseudo opcode so changes are allowed (see symbol.h for powers)
void parse_assignment(scope_t scope, bits force_bit, bits powers)
{
struct symbol *symbol;
struct object result;
GetByte(); // eat '='
symbol = symbol_find(scope);
ALU_any_result(&result);
// if wanted, mark as address reference
if (typesystem_says_address()) {
// FIXME - checking types explicitly is ugly...
if (result.type == &type_number)
result.u.number.addr_refs = 1;
}
symbol_set_object(symbol, &result, powers);
if (force_bit)
symbol_set_force_bit(symbol, force_bit);
}
// parse symbol definition (can be either global or local, may turn out to be a label).
// name must be held in GlobalDynaBuf.
static void parse_symbol_definition(scope_t scope, bits stat_flags)
{
bits force_bit;
force_bit = Input_get_force_bit(); // skips spaces after (yes, force bit is allowed for label definitions)
if (GotByte == '=') {
// explicit symbol definition (symbol = <something>)
parse_assignment(scope, force_bit, POWER_NONE);
Input_ensure_EOS();
} else {
// implicit symbol definition (label)
set_label(scope, stat_flags, force_bit, POWER_NONE);
}
}
// Parse global symbol definition or assembler mnemonic
static void parse_mnemo_or_global_symbol_def(bits *statement_flags)
{
boolean is_mnemonic;
is_mnemonic = CPU_state.type->keyword_is_mnemonic(Input_read_keyword());
// It is only a label if it isn't a mnemonic
if ((!is_mnemonic)
&& first_label_of_statement(statement_flags)) {
// Now GotByte = illegal char
// 04 Jun 2005: this fix should help to explain "strange" error messages.
// 17 May 2014: now it works for UTF-8 as well.
if ((*GLOBALDYNABUF_CURRENT == (char) 0xa0)
|| ((GlobalDynaBuf->size >= 2) && (GLOBALDYNABUF_CURRENT[0] == (char) 0xc2) && (GLOBALDYNABUF_CURRENT[1] == (char) 0xa0)))
Throw_first_pass_warning("Label name starts with a shift-space character.");
parse_symbol_definition(SCOPE_GLOBAL, *statement_flags);
}
}
// parse (cheap) local symbol definition
static void parse_local_symbol_def(bits *statement_flags, scope_t scope)
{
if (!first_label_of_statement(statement_flags))
return;
GetByte(); // start after '.'/'@'
if (Input_read_keyword())
parse_symbol_definition(scope, *statement_flags);
}
// parse anonymous backward label definition. Called with GotByte == '-'
static void parse_backward_anon_def(bits *statement_flags)
{
if (!first_label_of_statement(statement_flags))
return;
DYNABUF_CLEAR(GlobalDynaBuf);
do
DYNABUF_APPEND(GlobalDynaBuf, '-');
while (GetByte() == '-');
DynaBuf_append(GlobalDynaBuf, '\0');
// backward anons change their value!
set_label(section_now->local_scope, *statement_flags, NO_FORCE_BIT, POWER_CHANGE_VALUE);
}
// parse anonymous forward label definition. called with GotByte == ?
static void parse_forward_anon_def(bits *statement_flags)
{
if (!first_label_of_statement(statement_flags))
return;
DYNABUF_CLEAR(GlobalDynaBuf);
DynaBuf_append(GlobalDynaBuf, '+');
while (GotByte == '+') {
DYNABUF_APPEND(GlobalDynaBuf, '+');
GetByte();
}
symbol_fix_forward_anon_name(TRUE); // TRUE: increment counter
DynaBuf_append(GlobalDynaBuf, '\0');
//printf("[%d, %s]\n", section_now->local_scope, GlobalDynaBuf->buffer);
set_label(section_now->local_scope, *statement_flags, NO_FORCE_BIT, POWER_NONE);
}
// Parse block, beginning with next byte.
// End reason (either CHAR_EOB or CHAR_EOF) can be found in GotByte afterwards
// Has to be re-entrant.
void Parse_until_eob_or_eof(void)
{
bits statement_flags;
// // start with next byte, don't care about spaces
// NEXTANDSKIPSPACE();
// start with next byte
GetByte();
// loop until end of block or end of file
while ((GotByte != CHAR_EOB) && (GotByte != CHAR_EOF)) {
// process one statement
statement_flags = 0; // no "label = pc" definition yet
typesystem_force_address_statement(FALSE);
// Parse until end of statement. Only loops if statement
// contains implicit label definition (=pc) and something else; or
// if "!ifdef/ifndef" is true/false, or if "!addr" is used without block.
do {
// check for pseudo opcodes was moved out of switch,
// because prefix character is now configurable.
if (GotByte == config.pseudoop_prefix) {
pseudoopcode_parse();
} else {
switch (GotByte) {
case CHAR_EOS: // end of statement
// Ignore now, act later
// (stops from being "default")
break;
case ' ': // space
statement_flags |= SF_FOUND_BLANK;
/*FALLTHROUGH*/
case CHAR_SOL: // start of line
GetByte(); // skip
break;
case '-':
parse_backward_anon_def(&statement_flags);
break;
case '+':
GetByte();
if ((GotByte == LOCAL_PREFIX) // TODO - allow "cheap macros"?!
|| (BYTE_CONTINUES_KEYWORD(GotByte)))
Macro_parse_call();
else
parse_forward_anon_def(&statement_flags);
break;
case '*':
notreallypo_setpc(); // define program counter (fn is in pseudoopcodes.c)
break;
case LOCAL_PREFIX:
parse_local_symbol_def(&statement_flags, section_now->local_scope);
break;
case CHEAP_PREFIX:
parse_local_symbol_def(&statement_flags, section_now->cheap_scope);
break;
default:
if (BYTE_STARTS_KEYWORD(GotByte)) {
parse_mnemo_or_global_symbol_def(&statement_flags);
} else {
Throw_error(exception_syntax);
Input_skip_remainder();
}
}
}
} while (GotByte != CHAR_EOS); // until end-of-statement
vcpu_end_statement(); // adjust program counter
// go on with next byte
GetByte(); //NEXTANDSKIPSPACE();
}
}
// Skip space. If GotByte is CHAR_SOB ('{'), parse block and return TRUE.
// Otherwise (if there is no block), return FALSE.
// Don't forget to call EnsureEOL() afterwards.
int Parse_optional_block(void)
{
SKIPSPACE();
if (GotByte != CHAR_SOB)
return FALSE;
Parse_until_eob_or_eof();
if (GotByte != CHAR_EOB)
Throw_serious_error(exception_no_right_brace);
GetByte();
return TRUE;
}
// Error handling
// error/warning counter so macro calls can find out whether to show a call stack
static int throw_counter = 0;
int Throw_get_counter(void)
{
return throw_counter;
}
// This function will do the actual output for warnings, errors and serious
// errors. It shows the given message string, as well as the current
// context: file name, line number, source type and source title.
// TODO: make un-static so !info and !debug can use this.
static void throw_message(const char *message, const char *type)
{
++throw_counter;
if (config.format_msvc)
fprintf(config.msg_stream, "%s(%d) : %s (%s %s): %s\n",
Input_now->original_filename, Input_now->line_number,
type, section_now->type, section_now->title, message);
else
fprintf(config.msg_stream, "%s - File %s, line %d (%s %s): %s\n",
type, Input_now->original_filename, Input_now->line_number,
section_now->type, section_now->title, message);
}
// Output a warning.
// This means the produced code looks as expected. But there has been a
// situation that should be reported to the user, for example ACME may have
// assembled a 16-bit parameter with an 8-bit value.
void Throw_warning(const char *message)
{
PLATFORM_WARNING(message);
if (config.format_color)
throw_message(message, "\033[33mWarning\033[0m");
else
throw_message(message, "Warning");
}
// Output a warning if in first pass. See above.
void Throw_first_pass_warning(const char *message)
{
if (FIRST_PASS)
Throw_warning(message);
}
// Output an error.
// This means something went wrong in a way that implies that the output
// almost for sure won't look like expected, for example when there was a
// syntax error. The assembler will try to go on with the assembly though, so
// the user gets to know about more than one of his typos at a time.
void Throw_error(const char *message)
{
PLATFORM_ERROR(message);
if (config.format_color)
throw_message(message, "\033[31mError\033[0m");
else
throw_message(message, "Error");
++pass.error_count;
if (pass.error_count >= config.max_errors)
exit(ACME_finalize(EXIT_FAILURE));
}
// Output a serious error, stopping assembly.
// Serious errors are those that make it impossible to go on with the
// assembly. Example: "!fill" without a parameter - the program counter cannot
// be set correctly in this case, so proceeding would be of no use at all.
void Throw_serious_error(const char *message)
{
PLATFORM_SERIOUS(message);
if (config.format_color)
throw_message(message, "\033[1m\033[31mSerious error\033[0m");
else
throw_message(message, "Serious error");
// FIXME - exiting immediately inhibits output of macro call stack!
exit(ACME_finalize(EXIT_FAILURE));
}
// Handle bugs
void Bug_found(const char *message, int code)
{
Throw_warning("Bug in ACME, code follows");
fprintf(stderr, "(0x%x:)", code);
Throw_serious_error(message);
}
// insert object (in case of list, will iterate/recurse until done)
void output_object(struct object *object, struct iter_context *iter)
{
struct listitem *item;
int length;
char *read;
if (object->type == &type_number) {
if (object->u.number.ntype == NUMTYPE_UNDEFINED)
iter->fn(0);
else if (object->u.number.ntype == NUMTYPE_INT)
iter->fn(object->u.number.val.intval);
else if (object->u.number.ntype == NUMTYPE_FLOAT)
iter->fn(object->u.number.val.fpval);
else
Bug_found("IllegalNumberType0", object->u.number.ntype);
} else if (object->type == &type_list) {
// iterate over list
item = object->u.listhead->next;
while (item != object->u.listhead) {
output_object(&item->u.payload, iter);
item = item->next;
}
} else if (object->type == &type_string) {
// iterate over string
read = object->u.string->payload;
length = object->u.string->length;
// single-char strings are accepted, to be more compatible with
// versions before 0.97 (and empty strings are not really a problem...)
if (iter->accept_long_strings || (length < 2)) {
while (length--)
iter->fn(iter->stringxor ^ encoding_encode_char(*(read++)));
} else {
Throw_error("There's more than one character."); // see alu.c for the original of this error
}
} else {
Bug_found("IllegalObjectType", 0);
}
}
// output 8-bit value with range check
void output_8(intval_t value)
{
if ((value < -0x80) || (value > 0xff))
Throw_error(exception_number_out_of_8b_range);
Output_byte(value);
}
// output 16-bit value with range check big-endian
void output_be16(intval_t value)
{
if ((value < -0x8000) || (value > 0xffff))
Throw_error(exception_number_out_of_16b_range);
Output_byte(value >> 8);
Output_byte(value);
}
// output 16-bit value with range check little-endian
void output_le16(intval_t value)
{
if ((value < -0x8000) || (value > 0xffff))
Throw_error(exception_number_out_of_16b_range);
Output_byte(value);
Output_byte(value >> 8);
}
// output 24-bit value with range check big-endian
void output_be24(intval_t value)
{
if ((value < -0x800000) || (value > 0xffffff))
Throw_error(exception_number_out_of_24b_range);
Output_byte(value >> 16);
Output_byte(value >> 8);
Output_byte(value);
}
// output 24-bit value with range check little-endian
void output_le24(intval_t value)
{
if ((value < -0x800000) || (value > 0xffffff))
Throw_error(exception_number_out_of_24b_range);
Output_byte(value);
Output_byte(value >> 8);
Output_byte(value >> 16);
}
// FIXME - the range checks below are commented out because 32-bit
// signed integers cannot exceed the range of 32-bit signed integers.
// But now that 64-bit machines are the norm, "intval_t" might be a
// 64-bit int. I need to address this problem one way or another.
// output 32-bit value (without range check) big-endian
void output_be32(intval_t value)
{
// if ((value < -0x80000000) || (value > 0xffffffff))
// Throw_error(exception_number_out_of_32b_range);
Output_byte(value >> 24);
Output_byte(value >> 16);
Output_byte(value >> 8);
Output_byte(value);
}
// output 32-bit value (without range check) little-endian
void output_le32(intval_t value)
{
// if ((value < -0x80000000) || (value > 0xffffffff))
// Throw_error(exception_number_out_of_32b_range);
Output_byte(value);
Output_byte(value >> 8);
Output_byte(value >> 16);
Output_byte(value >> 24);
}