syn68k/syngen/parse.c
2008-09-26 08:25:10 -06:00

641 lines
17 KiB
C

/*
* parse.c
*/
#include "error.h"
#include "parse.h"
#include "reduce.h"
#include "macro.h"
#include "list.h"
#include "defopcode.h"
#include "bitstring.h"
#include "safe_alloca.h"
#include "uniquestring.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <unistd.h>
#include <errno.h>
#ifdef NeXT
#include <bsd/libc.h>
#endif
static List *parse_expression (void);
static int parse_define (SymbolTable *sym, List *new);
static int parse_defopcode (SymbolTable *sym, List *new);
static void verify_fields_exist (const char *bits, List *code);
/* Reads in all defines and defopcodes from the tokenizer. Places macros
* in the symbol table and generates code for the opcodes. Returns the number
* of macros + the number of opcodes parsed.
*/
int
parse_all_expressions (SymbolTable *sym)
{
List *new;
int num_parsed = 0;
List def;
while (fetch_next_token (&def.token))
{
if (def.token.type != TOK_LEFT_PAREN)
fatal_input_error ("Expecting '(' but not finding one; aborting.\n");
/* Read in the expression just defined. */
new = parse_expression ();
if (new == NULL)
{
input_error ("stray '(' with no following expression!");
continue;
}
if (new->token.type != TOK_DEFINE && new->token.type != TOK_DEFOPCODE)
{
List dummy = { NULL, 0 /* new */, { /* new->token */0 } };
dummy.car = new;
dummy.token = new->token;
dummy.token.type = TOK_LIST;
reduce (&dummy, sym, 0);
new = dummy.car;
}
if (new != NULL)
{
if (new->token.type == TOK_DEFINE)
num_parsed += parse_define (sym, new->cdr);
else if (new->token.type == TOK_DEFOPCODE)
{
num_parsed += parse_defopcode (sym, new->cdr);
if (preprocess_only)
{
fputs ("\n(", stdout);
print_list (stdout, new);
puts (")");
}
}
}
}
return num_parsed;
}
static int
parse_define (SymbolTable *sym, List *new)
{
const char *name;
Macro *macro;
SymbolInfo symbol_info;
/* Get the name of the macro for (define (foo x y)) or (define foo x). */
name = NULL;
switch (new->token.type) {
case TOK_LIST:
if (new->car != NULL && new->car->token.type == TOK_IDENTIFIER)
name = new->car->token.u.string;
break;
case TOK_QUOTED_STRING:
case TOK_IDENTIFIER:
name = new->token.u.string;
break;
default:
name = NULL;
break;
}
/* Make sure the macro name is legitimate. */
if (name == NULL)
{
parse_error (new, "Invalid macro name. Ignoring...\n");
return 0;
}
/* Create a new macro definition. */
macro = (Macro *) malloc (sizeof (Macro));
macro->expr = new;
macro->next = NULL;
/* If there is already a macro of this name in the table, append this
* one to the list of macros with this name.
*/
if (lookup_symbol (sym, name, &symbol_info, NULL) == HASH_NOERR)
{
Macro *tmp = (Macro *) symbol_info.p;
for (; tmp->next != NULL; tmp = tmp->next);
tmp->next = macro;
}
else /* Add this macro's definition to the hash table. */
{
symbol_info.p = macro;
insert_symbol (sym, name, symbol_info);
}
return 1;
}
static int
parse_defopcode (SymbolTable *sym, List *new)
{
#if !defined(__GNUC__) /* It's not clear which .h file I should */
extern void *alloca(int); /* pull in to get this declaration */
#endif
const char *name = NULL;
List dummy1 = { /* new */ 0, NULL, { /* new->token */ 0 } };
List dummy2 = { NULL, /* &dummy1 */ 0, { /* new->token */ 0 } };
SAFE_DECL ();
dummy1.cdr = new;
dummy1.token = new->token;
dummy2.car = &dummy1;
dummy2.token = new->token;
dummy1.token.type = TOK_DEFOPCODE;
dummy1.token.u.string = "defopcode";
dummy2.token.type = TOK_LIST;
dummy2.token.u.string = NULL;
if (new->token.type == TOK_QUOTED_STRING
|| new->token.type == TOK_IDENTIFIER)
name = new->token.u.string;
/* Make sure the opcode name is legitimate. */
if (name == NULL)
{
parse_error (new, "Invalid opcode name. Ignoring...\n");
return 0;
}
/* Perform initial macro substitutions. */
reduce (&dummy2, sym, 0);
/* Generate the opcode. */
if (!preprocess_only)
{
ParsedOpcodeInfo info;
List *l = CDR (CADDAR (&dummy2)), *err;
CCVariant *var;
int num_variants;
List *code_backup[MAX_VARIANTS];
const char *bits_to_expand[MAX_VARIANTS][MAX_VARIANTS];
int num_bits_to_expand[MAX_VARIANTS];
int max_num_bits_to_expand = 0;
int i;
num_variants = list_length (&dummy1) - 3;
if (list_length (l) != 4)
{
parse_error (l, "Missing arguments; should be 4, found %d\n",
list_length (l));
return 0;
}
if (num_variants > MAX_VARIANTS)
{
parse_error (l, "Error: too many variants. Only %d allowed. To "
"raise, change MAX_VARIANTS in defopcode.h.\n",
MAX_VARIANTS);
return 0;
}
/* Parse in the instruction information. */
memset (&info, 0, sizeof info);
info.name = CDAR (&dummy2)->token.u.string;
info.cpu = l->token.u.n;
info.amode = CDR (l);
info.misc_flags = CDDR (l);
if (info.misc_flags->token.type != TOK_LIST)
parse_error (info.misc_flags, "Expecting a list of misc flags!\n");
else
{
List *ls;
for (ls = info.misc_flags->car; ls != NULL; ls = ls->cdr)
{
char buf[256];
switch (ls->token.type) {
case TOK_ENDS_BLOCK:
info.ends_block = TRUE;
break;
case TOK_DONT_POSTINCDEC_UNEXPANDED:
info.dont_postincdec_unexpanded = TRUE;
break;
case TOK_NEXT_BLOCK_DYNAMIC:
info.next_block_dynamic = TRUE;
break;
case TOK_SKIP_TWO_OPERAND_WORDS:
info.operand_words_to_skip = 2;
break;
case TOK_SKIP_FOUR_OPERAND_WORDS:
info.operand_words_to_skip = 4;
break;
case TOK_SKIP_ONE_POINTER:
info.operand_words_to_skip = PTR_WORDS;
break;
case TOK_SKIP_TWO_POINTERS:
info.operand_words_to_skip = PTR_WORDS * 2;
break;
default:
parse_error (ls, "Unknown misc flag: %s",
unparse_token (&ls->token, buf));
break;
}
}
}
info.opcode_bits[0] = '\0';
err = CDDDR (l); /* For later, in case we need it. */
if (CDDDR (l)->token.type != TOK_LIST)
{
parse_error (CDDDR (l), "Expecting explicit list of opcode bits.\n");
strcpy (info.opcode_bits, "0000000000000000");
}
else
for (l = CDR (CADDDR (l)); l != NULL; l = l->cdr)
{
if (0 && strlen (l->token.u.string) != 16)
{
/* This seems a little harsh... */
parse_error (l, "Need exactly 16 characters in each bit "
"pattern specification string, found %d.\n",
strlen (l->token.u.string));
strcat (info.opcode_bits, "0000000000000000");
}
else
strcat (info.opcode_bits, l->token.u.string);
}
if (strlen (info.opcode_bits) % 16)
{
parse_error (err, "Not a multiple of 16 opcode bits.\n");
strcpy (info.opcode_bits, "0000000000000000");
}
info.cc_variant = NULL;
/* Parse the information for the cc variants. */
var = (CCVariant *) SAFE_alloca (num_variants * sizeof (CCVariant));
memset (var, 0, num_variants * sizeof (CCVariant));
info.cc_variant = var;
/* Loop through and parse each cc variant. */
for (i = num_variants - 1, l = CDDDR (&dummy1); i >= 0; i--, l = l->cdr)
{
static const struct {
char character;
unsigned char cc_may_set:1;
unsigned char cc_may_not_set:1;
unsigned char cc_to_known_value:1;
unsigned char cc_known_values:1;
} cc_bit_info[] = {
{ '0', 1, 0, 1, 0 }, /* 0 -> always force cc bit to 0. */
{ '1', 1, 0, 1, 1 }, /* 1 -> always force cc bit to 1. */
{ 'C', 1, 0, 0, 0 }, /* Any letter means will always set */
{ 'N', 1, 0, 0, 0 }, /* cc bit to either 0 or 1, but */
{ 'V', 1, 0, 0, 0 }, /* we can't determine which at */
{ 'X', 1, 0, 0, 0 }, /* compile time. */
{ 'Z', 1, 0, 0, 0 }, /* */
{ '-', 0, 1, 0, 0 }, /* - -> cc bit always unchanged. */
{ '>', 1, 1, 0, 0 }, /* > -> cc set to 1 or unchanged. */
{ '<', 1, 1, 0, 0 }, /* < -> cc set to 0 or unchanged. */
{ '?', 1, 0, 0, 0 }, /* ? -> cc bit undefined. */
{ '%', 1, 1, 0, 0 }, /* % -> cc might change, might not. */
};
int guess, bit;
const char *b;
List *bte;
/* Insert them in reverse order. */
if (i > 0)
var[i - 1].next = &var[i];
/* Parse the cc bits set specs. */
b = (CDAR (l))->token.u.string;
if (strlen (b) != 5)
{
parse_error (CDDAR (l), "Need exactly five characters in "
"cc bits specification.\n");
b = "-----";
}
for (bit = 0; bit < 5; bit++)
{
for (guess = sizeof cc_bit_info / sizeof cc_bit_info[0] - 1;
guess >= 0; guess--)
if (cc_bit_info[guess].character == b[4 - bit])
{
var[i].cc_may_set |= cc_bit_info[guess].cc_may_set << bit;
var[i].cc_may_not_set
|= cc_bit_info[guess].cc_may_not_set << bit;
var[i].cc_to_known_value
|= cc_bit_info[guess].cc_to_known_value << bit;
var[i].cc_known_values
|= cc_bit_info[guess].cc_known_values << bit;
break;
}
if (guess < 0)
parse_error (CDAR (l), "Unknown character '%c' in cc bit "
"specification.\n", b[4 - bit]);
}
/* Parse the cc bits needed specs. */
b = (CDDAR (l))->token.u.string;
if (strlen (b) != 5)
{
parse_error (CDDAR (l), "Need exactly five characters in "
"cc bits specification.\n");
b = "-----";
}
for (bit = 0; bit < 5; bit++)
{
if (strchr ("-CNVXZ", b[4 - bit]) == NULL)
parse_error (CDDAR (l), "Illegal character '%c' in cc bits "
"needed specification. Must be one of "
"-,C,N,V,X,Z.\n",
b[4 - bit]);
var[i].cc_needed |= (b[4 - bit] != '-') << bit;
}
/* Fetch all of the bits_to_expand strings into an array.
* Terminate the list provided by the user with "----------------"
* so we guarantee that all legal bit patterns get code generated
* for them.
*/
num_bits_to_expand[i] = 0;
bte = CADDR (CDAR (l));
if (bte->token.type != TOK_EXPLICIT_LIST)
parse_error (bte, "Expecting a list of bit pattern strings.\n");
else
{
for (bte = bte->cdr; bte != NULL; bte = bte->cdr)
bits_to_expand[i][num_bits_to_expand[i]++]
= bte->token.u.string;
}
bits_to_expand[i][num_bits_to_expand[i]++]
= "----------------";
if (num_bits_to_expand[i] > max_num_bits_to_expand)
max_num_bits_to_expand = num_bits_to_expand[i];
var[i].code = CDDR (CDDAR (l));
{
List *native = CDDR (CDDAR (l));
if (native != NULL && CAR (native) != NULL
&& (CAR (native))->token.type == TOK_NATIVE_CODE)
{
if ((CDAR (native))->token.type != TOK_QUOTED_STRING)
parse_error (CAR (native), "Faulty native code specifier.");
#ifdef GENERATE_NATIVE_CODE
{
char buf[1024];
List *xx;
buf[0] = '\0';
for (xx = CDAR (native); xx != NULL; xx = xx->cdr)
{
if (xx->token.type == TOK_QUOTED_STRING)
strcat (buf, xx->token.u.string);
else
parse_error (xx, "Expected string here.");
}
if (buf[0] == '\0' || !strcmp (buf, "none"))
var[i].native_code_info = NULL;
else
var[i].native_code_info = unique_string (buf);
}
#endif
var[i].code = native->cdr;
}
else
{
#ifdef GENERATE_NATIVE_CODE
var[i].native_code_info = NULL;
#endif
var[i].code = CDDR (CDDAR (l));
}
}
#ifndef GENERATE_NATIVE_CODE
var[i].native_code_info = NULL;
#endif
}
/* Verify that all of the fields they specified actually exist. */
for (i = 0; i < num_variants; i++)
verify_fields_exist (info.opcode_bits, var[i].code);
/* Loop through and generate code for each bits_to_expand specified.
* This is not elegant; originally, only one bits_to_expand string
* could be specified, and its semantics were more limited than the
* new, more powerful versions. To compensate, we will call the
* lower level routines once for each bits_to_expand string, and
* modify the legal addressing mode expression to provide us with the
* new literal bits matching capability of the new bits_to_expand
* strings.
*/
for (i = 0; i < num_variants; i++)
{
/*
* TODO: FIXME -- I don't like "17" below
*/
/* We allocate 17 bytes because we can only expand up to 16
* bits, and we have one extra for the terminating 0.
*/
var[i].bits_to_expand = (char *) malloc (17);
code_backup[i] = var[i].code;
}
for (i = 0; i < max_num_bits_to_expand; i++)
{
char intersect[MAX_VARIANTS][17];
List *saved_amode = info.amode;
List *old_cdr = info.amode->cdr;
int j;
info.amode->cdr = NULL;
/* Preserve code. */
for (j = 0; j < num_variants; j++)
var[j].code = copy_list (code_backup[j]);
/* Grab all of the bits_to_expand strings. */
for (j = 0; j < num_variants; j++)
{
char *p;
strcpy (var[j].bits_to_expand, ((i < num_bits_to_expand[j])
? bits_to_expand[j][i]
: "----------------"));
/* Generate the intersection string. */
strcpy (intersect[j], var[j].bits_to_expand);
for (p = intersect[j]; *p != '\0'; p++)
if (*p == '-')
*p = 'x';
/* Expand all literal bits by replacing them with 'x'.*/
for (p = var[j].bits_to_expand; *p != '\0'; p++)
if (*p == '0' || *p == '1')
*p = 'x';
}
/* Intersect with all of the literal bits we know about. */
for (j = 0; j < num_variants; j++)
if (strcmp (intersect[j], "xxxxxxxxxxxxxxxx"))
{
List *isect = alloc_list ();
isect->token.type = TOK_LIST;
isect->car = alloc_list ();
isect->car->token.type = TOK_INTERSECT;
isect->car->token.u.string = "intersect";
isect->car->cdr = alloc_list ();
isect->car->cdr->token.type = TOK_QUOTED_STRING;
isect->car->cdr->token.u.string = intersect[j];
isect->car->cdr->cdr = info.amode;
info.amode = isect;
}
/* Generate the actual code. */
generate_opcode (&info, sym);
/* Memory leak here, but who cares? */
info.amode = saved_amode;
info.amode->cdr = old_cdr;
}
ASSERT_SAFE(var);
}
return 1;
}
static void
verify_fields_exist (const char *bits, List *code)
{
PatternRange r;
if (code == NULL)
return;
if (IS_DOLLAR_TOKEN (code->token.type)
&& !pattern_range (bits, code->token.u.dollarinfo.which, &r))
parse_error (code, "Unknown field number %d; there are not that many "
"fields.\n", code->token.u.dollarinfo.which);
verify_fields_exist (bits, code->car);
verify_fields_exist (bits, code->cdr);
}
List *
string_to_list (const char *string, const char *include_dirs[])
{
FILE *temp;
List *enclosing_list;
Token t;
char buf[256];
char filename[32] = "/tmp/syngenXXXXXX";
static int busy_p = FALSE;
int fd;
assert (!busy_p);
busy_p = TRUE;
/* POOR IMPLEMENTATION - creates temp file to get a FILE * ! */
#if 0
/* Loses mysteriously under linux. Be totally paranoid now. */
temp = tmpfile ();
#else
assert ((fd = mkstemp (filename)) >= 0);
temp = fdopen (fd, "w");
#endif
if (temp == NULL)
{
#if !defined (__MINGW32__)
extern int errno;
fatal_error ("string_to_list: Unable to create temp file! %s\n",
strerror (errno));
#else
fatal_error ("string_to_list: Unable to create temp file!\n");
#endif
}
fputs (string, temp);
fclose (temp);
open_file (filename, NULL);
temp = current_stream ();
if (!fetch_next_token (&t))
{
fatal_error ("Empty expression specified.\n");
return NULL;
}
if (t.type != TOK_LEFT_PAREN)
{
input_error ("Expression must begin with open paren, not \"%s\"\n",
unparse_token (&t, buf));
return NULL;
}
enclosing_list = alloc_list ();
enclosing_list->car = parse_expression ();
enclosing_list->token.type = TOK_LIST;
enclosing_list->token.u.string = "[LIST]";
enclosing_list->token.lineno = 1;
enclosing_list->token.filename = "<command line>";
/* No need to fclose here; that will automatically happen when
* the parser runs out of tokens for this file.
*/
if (current_stream () == temp)
{
close_file ();
assert (current_stream () != temp);
}
#ifdef NeXT
remove (filename);
#else
unlink (filename);
#endif
busy_p = FALSE;
return enclosing_list;
}
/* Recursively parses a list from the token stream. It assumes that, on entry,
* the '(' starting the list has already been consumed. A list is terminated
* by a ')'.
*/
static List *
parse_expression ()
{
List *ret = NULL, *new, **last = &ret;
Token t;
while (1)
{
if (!fetch_next_token (&t))
fatal_error ("Premature EOF.\n");
if (t.type == TOK_RIGHT_PAREN)
return ret;
/* Create a new list and append it to the current one. */
new = *last = alloc_list ();
new->token = t;
last = &new->cdr;
if (t.type == TOK_LEFT_PAREN)
{
new->token.type = TOK_LIST;
new->car = parse_expression ();
}
}
}