diff --git a/xa/Makefile b/xa/Makefile index 7691165..ce8aab1 100644 --- a/xa/Makefile +++ b/xa/Makefile @@ -1,7 +1,7 @@ # Unix gcc or DOS go32 cross-compiling gcc # VERS = 2.4.1 -CC = gcc +CC = gcc -Wall LD = gcc # for testing. not to be used; build failures in misc/. #CFLAGS = -O2 -W -Wall -pedantic -ansi -g diff --git a/xa/src/xa.c b/xa/src/xa.c index 3dc5daa..9f5b2dc 100644 --- a/xa/src/xa.c +++ b/xa/src/xa.c @@ -29,13 +29,13 @@ #include #endif -/* macros */ -#include "xad.h" - /* structs and defs */ #include "xah.h" #include "xah2.h" +/* macros */ +#include "xad.h" + /* exported functions are defined here */ #include "xa.h" #include "xal.h" @@ -136,10 +136,6 @@ int main(int argc, char *argv[]) { char *lfile; /* labels go here */ char *ifile; - char old_e[MAXLINE]; - char old_l[MAXLINE]; - char old_o[MAXLINE]; - tim1 = time(NULL); // note: unfortunately we do no full distinction between 65C02 and 65816. @@ -876,6 +872,7 @@ static int pass2(void) { static int pass1(void) { signed char o[2 * MAXLINE]; /* doubled for token listing */ + char s[MAXLINE]; int l, er, al; memode = 0; @@ -1135,13 +1132,14 @@ static int puttmps(signed char *s, int l) { static char l[MAXLINE]; -static int xa_getline(char *s) { +static int xa_getline(char *out_line) { static int ec; static int i, c; - int hkfl, j, comcom; + int hkfl = 0; + int wr_cnt = 0; + int comcom = 0; - j = hkfl = comcom = 0; ec = E_OK; if (!gl) { @@ -1174,7 +1172,7 @@ static int xa_getline(char *s) { if (!ec || ec == E_EOF) { int startofline = 1; do { - c = s[j] = l[i++]; + c = out_line[wr_cnt] = l[i++]; if (!(hkfl & 2) && c == '\"') hkfl ^= 1; @@ -1209,14 +1207,14 @@ static int xa_getline(char *s) { if (!isspace(c)) { startofline = 0; } - j++; - } while (c != '\0' && j < MAXLINE - 1 && i < MAXLINE - 1); + wr_cnt++; + } while (c != '\0' && wr_cnt < MAXLINE - 1 && i < MAXLINE - 1); - s[j] = '\0'; + out_line[wr_cnt] = '\0'; } else - s[0] = '\0'; + out_line[0] = '\0'; #if 0 - printf("got line: %s\n", s); + printf("got line: %s\n", out_line); #endif return (ec); } diff --git a/xa/src/xad.h b/xa/src/xad.h index 244241f..95080ed 100644 --- a/xa/src/xad.h +++ b/xa/src/xad.h @@ -1,6 +1,6 @@ /* xa65 - 65xx/65816 cross-assembler and utility suite * - * Copyright (C) 1989-1997 André Fachat (a.fachat@physik.tu-chemnitz.de) + * Copyright (C) 1989-1997 Andr� Fachat (a.fachat@physik.tu-chemnitz.de) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -31,6 +31,8 @@ #define cval(s) 256 * ((s)[1] & 255) + ((s)[0]&255) #define lval(s) 65536 * ((s)[2] & 255) + 256 * ((s)[1] & 255) + ((s)[0] & 255) + +/* deprecated. should be replaced with the following inline function */ #define wval(i, v, f) do { \ t[i++] = T_VALUE; \ t[i++] = v & 255; \ @@ -39,6 +41,38 @@ t[i++] = f & 255; \ } while (0) +#define wvalo(i, v, f) do { \ + out[i++] = T_VALUE; \ + out[i++] = v & 255; \ + out[i++] = (v >> 8) & 255; \ + out[i++] = (v >> 16) & 255; \ + out[i++] = f & 255; \ + } while (0) + +/* + * Writes a token for an integer value (24 bits), and a flag into an output buffer + * (typically the tokenizer output stream) + * + * returns number of bytes written + * + * Flag could be for example: + * 'd' - to be printed as decimal + * '$' - to be printed as hex + * '&' - to be printed as octal + * '%' - to be printed as binary + * ''' - to be printed as char with single quote delimiter + * '"' - to be printed as char with double quote delimiter + */ +static inline int write_val(signed char *out_token, int value, char flag) { + int l = 0; + out_token[l++] = T_VALUE; + out_token[l++] = value & 255; + out_token[l++] = (value >> 8) & 255; + out_token[l++] = (value >> 16) & 255; + out_token[l++] = flag & 255; + return l; +} + #define wval_len 5 /* number of bytes stored in wval() call */ #endif /* __XA65_XAD_H__ */ diff --git a/xa/src/xal.c b/xa/src/xal.c index fbed9ae..64b50bc 100644 --- a/xa/src/xal.c +++ b/xa/src/xal.c @@ -26,8 +26,8 @@ /* structs and defs */ -#include "xad.h" #include "xah.h" +#include "xad.h" #include "xar.h" #include "xah2.h" #include "xap.h" diff --git a/xa/src/xalisting.c b/xa/src/xalisting.c index cfc9443..5269529 100644 --- a/xa/src/xalisting.c +++ b/xa/src/xalisting.c @@ -439,8 +439,8 @@ int list_tokens(char *buf, signed char *input, int len) { if (tmp >= 0 && tmp < number_of_valid_tokens) { /* assembler keyword */ /*printf("tmp=%d, kt[tmp]=%p\n", tmp, kt[tmp]);*/ - if (kt[tmp] != NULL) { - outp += list_string(buf + outp, kt[tmp]); + if (keyword_table[tmp] != NULL) { + outp += list_string(buf + outp, keyword_table[tmp]); } outp += list_sp(buf + outp); inp += 1; diff --git a/xa/src/xap.c b/xa/src/xap.c index 1324011..94dc8f1 100644 --- a/xa/src/xap.c +++ b/xa/src/xap.c @@ -28,9 +28,9 @@ #include #endif -#include "xad.h" #include "xah.h" #include "xah2.h" +#include "xad.h" #include "xar.h" #include "xa.h" diff --git a/xa/src/xap.h b/xa/src/xap.h index 79da7a4..d0c80be 100644 --- a/xa/src/xap.h +++ b/xa/src/xap.h @@ -1,6 +1,6 @@ /* xa65 - 65xx/65816 cross-assembler and utility suite * - * Copyright (C) 1989-1997 André Fachat (a.fachat@physik.tu-chemnitz.de) + * Copyright (C) 1989-1997 Andr� Fachat (a.fachat@physik.tu-chemnitz.de) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -39,6 +39,6 @@ long gm_ppm(void); long ga_ppm(void); extern Datei *filep; -extern char s[MAXLINE]; +//extern char s[MAXLINE]; #endif /* __XA65_XAP_H__ */ diff --git a/xa/src/xar.c b/xa/src/xar.c index 7ae6227..988f46f 100644 --- a/xa/src/xar.c +++ b/xa/src/xar.c @@ -22,8 +22,9 @@ #include #include -#include "xad.h" #include "xah.h" +#include "xad.h" + #include "xar.h" #include "xa.h" #include "xal.h" diff --git a/xa/src/xat.c b/xa/src/xat.c index 5987786..c0698b3 100644 --- a/xa/src/xat.c +++ b/xa/src/xat.c @@ -32,9 +32,9 @@ #include #include -#include "xad.h" #include "xah.h" #include "xah2.h" +#include "xad.h" #include "xar.h" #include "xa.h" @@ -60,11 +60,22 @@ static int t_p2(signed char *t, int *ll, int fl, int *al); void list_setbytes(int number_of_bytes_per_line); -/* assembly mnemonics and pseudo-op tokens */ +/********************************************************************/ + +/* table of keywords. + * + * A keyword is either a cpu assembly mnemonic, or a pseudo-opcode + * + * The index in this table matches with the keyword index value + * in the parser output, for which constants are defined below + * as K* values. + */ + /* ina and dea don't work yet */ + /* Note AF 20110624: added some ca65 compatibility pseudo opcodes, * many are still missing (and will most likely never by supported in this - * code base). Potential candidates are .hibytes, .lobytes, .asciiz, + * code base). Potential candidates for the future are .hibytes, .lobytes, .asciiz, * .addr, .charmap, .dbyt, .faraddr, .bankbytes, .segment (at least for the known ones) * .incbin is similar to our .bin, but with parameters reversed (argh...) * I like the .popseg/.pushseg pair; @@ -72,28 +83,25 @@ void list_setbytes(int number_of_bytes_per_line); * .export/.exportzp could be implemented with a commandline switch to NOT export * global labels, where .exported labels would still be exported in an o65 file. */ -char *kt[] ={ +char *keyword_table[] ={ + + /* cpu mnemonics */ + /* 1 2 3 4 5 6 7 8 9 10 */ "adc","and","asl","bbr","bbs","bcc","bcs","beq","bit","bmi", "bne","bpl","bra","brk","bvc","bvs","brl","clc","cld","cli", -/* - "clv","cmp","cpx","cpy","cop","dea","dec","dex","dey","eor", -*/ "clv","cmp","cpx","cpy","cop",/*"dea",*/"dec","dex","dey","eor", - -/* - "ina","inc","inx","iny","jmp","jsr","lda","ldx","ldy","lsr", -*/ /*"ina",*/"inc","inx","iny","jmp","jsr","lda","ldx","ldy","lsr", "mvp","mvn","nop","ora","pha","php","phx","phy","pla","plp", "plx","ply","phb","phd","phk","plb","pld","pea","pei","per", - "rmb","rol","ror","rti","rts","rep","rtl","sbc","sec","sed", "sei","smb","sta","stx","sty","stz","sep","stp","tax","tay", "trb","tsb","tsx","txa","txs","tya","txy","tyx","tcd","tdc", "tcs","tsc","wai","wdb","xba","xce", + /* pseudo opcodes */ + ".byt",".word",".asc",".dsb", ".(", ".)", "*=", ".text",".data",".bss", ".zero",".fopt", ".byte", ".end", ".list", ".xlist", ".dupb", ".blkb", ".db", ".dw", ".align",".block", ".bend",".al",".as",".xl",".xs", ".bin", ".aasc", ".code", @@ -103,29 +111,20 @@ char *kt[] ={ }; -/* arithmetic operators (purely for listing, parsing is done programmatically */ -char *arith_ops[] = { - "", "+", "-", // 0,1,2 - "*", "/", // 3,4 - ">>", "<<", // 5,6 - "<", ">", "=", // 7,8,9 - "<=", ">=", "<>", // 10,11,12 - "&", "^", "|", // 13,14,15 - "&&", "||", "==", "!=", "!" // 16,17,18 (9),19 (12),20 (NYI) -}; - -/* length of arithmetic operators indexed by operator number */ -static int lp[] = { 0, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, - 1 }; +/* the values here are constants that, for the positive values, define the index of the respective keywords + * in the keyword_table[]. The keywords are identified in the input stream by the tokenizer, and then given + * to the pass 1 parser. + */ /* mvn and mvp are handled specially, they have a weird syntax */ #define Kmvp 38 #define Kmvn Kmvp+1 -/* index into token array for pseudo-ops */ -/* last valid mnemonic */ +/* index of last valid cpu assembly mnemonic */ #define Lastbef 93 +/* index into keyword_table[] (token array) for pseudo-opcodes */ + #define Kbyt Lastbef+1 #define Kword Lastbef+2 #define Kasc Lastbef+3 @@ -190,8 +189,71 @@ int number_of_valid_tokens = Anzkey; static int ktp[] = { 0, 3, 17, 25, 28, 29, 29, 29, 29, 32, 34, 34, 38, 40, 41, 42, 58, 58, 65, 76, 90, 90, 90, 92, 94, 94, 94, Anzkey }; +/********************************************************************/ + +/* arithmetic operators (purely for listing, parsing is done programmatically */ +char *arith_ops[] = { + "", "+", "-", // 0,1,2 + "*", "/", // 3,4 + ">>", "<<", // 5,6 + "<", ">", "=", // 7,8,9 + "<=", ">=", "<>", // 10,11,12 + "&", "^", "|", // 13,14,15 + "&&", "||", "==", "!=", "!" // 16,17,18 (9),19 (12),20 (NYI) +}; + +/* length of arithmetic operators indexed by operator number */ +static int lp[] = { 0, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, + 1 }; + +/********************************************************************/ + +/* addressing modes */ + +/* an addressing mode is this number: + * + * 00 = implied + * 01 = zero page + * 02 = zero page,x + * 03 = direct page,y* + * 04 = direct page (indirect)* + * 05 = (indirect,x) + * 06 = (indirect),y + * 07 = immediate (8-bit) + * 08 = absolute + * 09 = absolute,x + * 10 = absolute,y + * 11 = relative + * 12 = (indirect-16) i.e., jmp (some_vector) + * 13 = (absolute,x)* + * 14 = zero page+relative test'n'branch ^ + * 15 = zero page clear'n'set'bit ^ + * 16 = relative long* + * 17 = absolute long* + * 18 = absolute long,x* + * 19 = stack relative* + * 20 = stack relative (indirect),y* + * 21 = direct page (indirect long)* + * 22 = direct page (indirect long),y* + * 23 = (indirect long) + * + */ +/* number of addressing modes */ #define Admodes 24 +/* cross check: instruction should be this many bytes long in total */ +/* indexed by addressing mode */ +static int len_by_addr_mode[] ={ 1,2,2,2,2,2,2,2,3,3,3,2,3,3,3,2, + /* new modes */ 3,4,4,2,2,2,2,3 }; + +/* indicates absolute->zp optimizable addressing modes (abs->zp) */ +/* indexed by addressing mode */ +static int opt[] ={ -1,-1,-1,-1,-1,-1,-1,-1,1,2,3,-1,4,5,-1,-1, + /*new*/ -1,8,9,-1,-1,-1,-1,-1 }; /* abs -> zp */ + + + + /* * opcodes for each addressing mode * high byte: supported architecture (no bits = original NMOS 6502) @@ -227,6 +289,9 @@ static int ktp[] = { 0, 3, 17, 25, 28, 29, 29, 29, 29, 32, 34, 34, 38, 40, 41, * 23 = (indirect long) */ + + + static int ct[Lastbef+1][Admodes] ={ /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 imm */ { -1, 0x65,0x75,-1,0x172,0x61,0x71,0x469,0x6d,0x7d,0x79,-1, -1, -1, -1, -1, -1,0x26f,0x27f,0x263,0x273,0x267,0x277,-1 }, /*adc*/ @@ -372,15 +437,6 @@ static int xt[AnzAlt][2] ={ /* Alternativ Adr-Modes */ { 8, 16 } /* abs -> relong */ }; -/* cross check: instruction should be this many bytes long in total */ -/* indexed by addressing mode */ -static int le[] ={ 1,2,2,2,2,2,2,2,3,3,3,2,3,3,3,2, - /* new modes */ 3,4,4,2,2,2,2,3 }; - -/* indicates absolute->zp optimizable addressing modes (abs->zp) */ -/* indexed by addressing mode */ -static int opt[] ={ -1,-1,-1,-1,-1,-1,-1,-1,1,2,3,-1,4,5,-1,-1, - /*new*/ -1,8,9,-1,-1,-1,-1,-1 }; /* abs -> zp */ /*********************************************************************************************/ /* pass 1 */ @@ -958,7 +1014,7 @@ fprintf(stderr, "E_NODEF pass1 xat.c\n"); er = E_SYNTAX; else { /* ok, get length of instruction */ - bl = le[am]; + bl = len_by_addr_mode[am]; /* and add one for 65816 special instruction modes */ if (((ct[n][am] & 0x400) && memode) || ((ct[n][am] & 0x800) && xmode)) { @@ -1829,7 +1885,7 @@ fprintf(stderr, if (!bl) er = E_SYNTAX; else { - bl = le[am]; + bl = len_by_addr_mode[am]; if (((ct[n][am] & 0x400) && memode) || ((ct[n][am] & 0x800) && xmode)) { bl++; @@ -1900,7 +1956,7 @@ fprintf(stderr, "address mode: %i address: %i\n", am, vv[0]); er = E_ILLPOINTER; } else { /*printf("am=11, pc=%04x, vv[0]=%04x, segment=%d\n",pc[segment],vv[0], segment);*/ - v = vv[0] - pc[segment] - le[am]; + v = vv[0] - pc[segment] - len_by_addr_mode[am]; if (((v & 0xff80) != 0xff80) && (v & 0xff80) && (am == 11)) er = E_RANGE; @@ -1910,7 +1966,7 @@ fprintf(stderr, "address mode: %i address: %i\n", am, vv[0]); } } } else if (am == 14) { - if (vv[0] & 0xfff8 || vv[1] & 0xff00) + if ((vv[0] & 0xfff8) || (vv[1] & 0xff00)) er = E_RANGE; else if ((segment != SEG_ABS) && (rlt[0] || !rlt[2])) { er = E_ILLPOINTER; @@ -1930,7 +1986,7 @@ fprintf(stderr, "address mode: %i address: %i\n", am, vv[0]); /*if(rlt[1]) printf("relocation 1 byte %04x at pc=$%04x, value now =$%04x\n",rlt[1],pc[segment]+1,*vv); */ if (rlt[1]) u_set(pc[segment] + 1, rlt[1], lab[1], 1); - if (vv[0] & 0xfff8 || vv[1] & 0xff00) + if ((vv[0] & 0xfff8) || (vv[1] & 0xff00)) er = E_OVERFLOW; else { t[0] = t[0] | (vv[0] << 4); @@ -1989,20 +2045,20 @@ int b_term(char *s, int *v, int *l, int pc) { * token sequence in *l * * Input params: - * s source input line - * t output token sequence buffer - * l return length of output token sequence here - * pc the current PC to set address labels to that value - * nk return number of comma in the parameters - * na1 asc text count returned - * na2 total byte count in asc texts returned - * af arithmetic flag: 0=do label definitions, parse opcodes and params; - * 1=only tokenize parameters, for b_term() call from the preprocessor - * for arithmetic conditions + * src source input line + * out output token sequence buffer + * out_token_len return length of output token sequence here + * pc the current PC to set address labels to that value + * out_comma_count return number of comma in the parameters + * out_cnt_strings asc text count returned + * out_cnt_chars_in_strings total byte count in asc texts returned + * af arithmetic flag: 0=do label definitions, parse opcodes and params; + * 1=only tokenize parameters, for b_term() call from the preprocessor + * for arithmetic conditions * bytep ??? */ -static int t_conv(signed char *s, signed char *t, int *l, int pc, int *nk, - int *na1, int *na2, int af, int *bytep) { +static int t_conv(signed char *src, signed char *out, int *out_token_len, int pc, int *out_comma_count, + int *out_cnt_strings, int *out_cnt_chars_in_strings, int af, int *bytep) { static int v, f; static int operand, o; int fl, afl; @@ -2010,7 +2066,7 @@ static int t_conv(signed char *s, signed char *t, int *l, int pc, int *nk, int ud; /* counts undefined labels */ int n; /* label number to be passed between l_def (definition) and l_set (set the value) */ int byte; - int uz; /* unused at the moment */ +// int uz; /* unused at the moment */ /*static unsigned char cast;*/ /* ich verstehe deutsch, aber verstehen andere leute nicht; so, werde ich @@ -2019,52 +2075,53 @@ static int t_conv(signed char *s, signed char *t, int *l, int pc, int *nk, comments ... Cameron */ /* note that I don't write so good tho' ;) */ - *nk = 0; /* comma count */ - *na1 = 0; /* asc text count */ - *na2 = 0; /* total bytecount in asc texts */ + *out_comma_count = 0; /* comma count */ + *out_cnt_strings = 0; /* asc text count */ + *out_cnt_chars_in_strings = 0; /* total bytecount in asc texts */ ll = 0; er = E_OK; /* error state */ p = 0; q = 0; - ud = uz = byte = 0; + ud = 0; + byte = 0; mk = 0; /* 0 = add'l commas ok */ fl = 0; /* 1 = pass text thru */ afl = 0; /* pointer flag for label */ // skip leading whitespace - while (isspace(s[p])) + while (isspace(src[p])) p++; n = T_END; /*cast='\0';*/ if (!af) { - while (s[p] != '\0' && s[p] != ';') { + while (src[p] != '\0' && src[p] != ';') { //printf("CONV: %s\n", s); - if (s[p] == ':') { + if (src[p] == ':') { // this is a ca65 unnamed label - if ((er = l_def((char*) s + p, &ll, &n, &f))) + if ((er = l_def((char*) src + p, &ll, &n, &f))) break; l_set(n, pc, segment); /* set as address value */ - t[q++] = T_DEFINE; - t[q++] = n & 255; - t[q++] = (n >> 8) & 255; + out[q++] = T_DEFINE; + out[q++] = n & 255; + out[q++] = (n >> 8) & 255; n = 0; p += ll; - while (isspace(s[p])) + while (isspace(src[p])) p++; // end of line - if (s[p] == 0 || s[p] == ';') { + if (src[p] == 0 || src[p] == ';') { break; } } /* is keyword? */ - if (!(er = t_keyword(s + p, &ll, &n))) + if (!(er = t_keyword(src + p, &ll, &n))) break; /* valid syntax, but just not a real token? */ @@ -2073,57 +2130,57 @@ static int t_conv(signed char *s, signed char *t, int *l, int pc, int *nk, // if so, try to understand as label // it returns the label number in n - if ((er = l_def((char*) s + p, &ll, &n, &f))) + if ((er = l_def((char*) src + p, &ll, &n, &f))) break; p += ll; - while (isspace(s[p])) + while (isspace(src[p])) p++; - if (s[p] == '=') { + if (src[p] == '=') { /*printf("Found = @%s\n",s+p);*/ - t[q++] = T_OP; - t[q++] = n & 255; - t[q++] = (n >> 8) & 255; - t[q++] = '='; + out[q++] = T_OP; + out[q++] = n & 255; + out[q++] = (n >> 8) & 255; + out[q++] = '='; p++; ll = n = 0; break; - } else if (s[p] == ':' && s[p + 1] == '=') /* support := label assignments (ca65 compatibility) */ + } else if (src[p] == ':' && src[p + 1] == '=') /* support := label assignments (ca65 compatibility) */ { /*printf("Found := @%s\n", s+p);*/ - t[q++] = T_OP; - t[q++] = n & 255; - t[q++] = (n >> 8) & 255; - t[q++] = '='; + out[q++] = T_OP; + out[q++] = n & 255; + out[q++] = (n >> 8) & 255; + out[q++] = '='; p += 2; ll = n = 0; break; - } else if (f && s[p] != '\0' && s[p + 1] == '=') { - t[q++] = T_OP; - t[q++] = n & 255; - t[q++] = (n >> 8) & 255; - t[q++] = s[p]; + } else if (f && src[p] != '\0' && src[p + 1] == '=') { + out[q++] = T_OP; + out[q++] = n & 255; + out[q++] = (n >> 8) & 255; + out[q++] = src[p]; p += 2; ll = n = 0; break; - } else if (s[p] == ':') /* to support label: ... syntax */ + } else if (src[p] == ':') /* to support label: ... syntax */ { p++; - while (s[p] == ' ') + while (src[p] == ' ') p++; l_set(n, pc, segment); /* set as address value */ - t[q++] = T_DEFINE; - t[q++] = n & 255; - t[q++] = (n >> 8) & 255; + out[q++] = T_DEFINE; + out[q++] = n & 255; + out[q++] = (n >> 8) & 255; n = 0; } else { /* label ... syntax */ l_set(n, pc, segment); /* set as address value */ - t[q++] = T_DEFINE; - t[q++] = n & 255; - t[q++] = (n >> 8) & 255; + out[q++] = T_DEFINE; + out[q++] = n & 255; + out[q++] = (n >> 8) & 255; n = 0; } @@ -2134,14 +2191,14 @@ static int t_conv(signed char *s, signed char *t, int *l, int pc, int *nk, } } - if (s[p] == '\0' || s[p] == ';') { + if (src[p] == '\0' || src[p] == ';') { er = E_NOLINE; ll = 0; } else if (!er) { p += ll; if (ll) { - t[q++] = n & 0xff; + out[q++] = n & 0xff; /* if( (n&0xff) == Kmacro) { t[q++]= (n >> 8) & 0xff; @@ -2152,16 +2209,16 @@ static int t_conv(signed char *s, signed char *t, int *l, int pc, int *nk, operand = 1; // skip whitespace - while (isspace(s[p])) { + while (isspace(src[p])) { p++; } - if (s[p] == '#') { + if (src[p] == '#') { mk = 0; - t[q++] = s[p++]; + out[q++] = src[p++]; // skip following whitespace - while (isspace(s[p])) { + while (isspace(src[p])) { p++; } } @@ -2180,10 +2237,10 @@ static int t_conv(signed char *s, signed char *t, int *l, int pc, int *nk, /* FIXIT2 */ - while (s[p] != '\0' && s[p] != ';' && !er) { + while (src[p] != '\0' && src[p] != ';' && !er) { if (fl) { // pass through text (e.g. for ",y") - t[q++] = s[p++]; + out[q++] = src[p++]; } else { if (operand) { @@ -2191,25 +2248,25 @@ static int t_conv(signed char *s, signed char *t, int *l, int pc, int *nk, addressing mode? !, @, ` operators Note these are not available in ca65, but we only switch off "@" which are used for cheap local labels*/ - if (s[p] == '!' || (s[p] == '@' && !ca65) || s[p] == '`') { + if (src[p] == '!' || (src[p] == '@' && !ca65) || src[p] == '`') { #ifdef DEBUG_CAST - printf("Setting cast to: %c\n", s[p]); + printf("Setting cast to: %c\n", src[p]); #endif - t[q++] = T_CAST; - t[q++] = s[p]; + out[q++] = T_CAST; + out[q++] = src[p]; operand = -operand + 1; p++; - } else if (s[p] == '(' || s[p] == '-' || s[p] == '>' - || s[p] == '<' || s[p] == '[') { - t[q++] = s[p++]; + } else if (src[p] == '(' || src[p] == '-' || src[p] == '>' + || src[p] == '<' || src[p] == '[') { + out[q++] = src[p++]; operand = -operand + 1; /* invert to become reinverted */ - } else if (s[p] == '*') { - t[q++] = s[p++]; + } else if (src[p] == '*') { + out[q++] = src[p++]; } else /* maybe it's a label Note that for ca65 cheap local labels, we check for "@" */ - if (isalpha(s[p]) || s[p] == '_' || (s[p] == ':' && collab) - || ((s[p] == ':' || s[p] == '@') && ca65)) { + if (isalpha(src[p]) || src[p] == '_' || (src[p] == ':' && collab) + || ((src[p] == ':' || src[p] == '@') && ca65)) { int p2 = 0; if (n == (Klistbytes & 0xff)) { @@ -2220,44 +2277,45 @@ static int t_conv(signed char *s, signed char *t, int *l, int pc, int *nk, // a T_CONSTANT. Which would also fix the listing of this constant // (which is currently listed as "0") static char *unlimited = "unlimited"; - while (s[p + p2] != 0 && unlimited[p2] != 0 - && s[p + p2] == unlimited[p2]) + while (src[p + p2] != 0 && unlimited[p2] != 0 + && src[p + p2] == unlimited[p2]) p2++; } if (p2 == 9) { // length of "unlimited" er = E_OK; // found constant - wval(q, 0, 'd'); + wvalo(q, 0, 'd'); + //q += write_val(out + q, 0, 'd'); p += p2; } else { //m=n; - er = l_search((char*) s + p, &ll, &n, &v, &afl); + er = l_search((char*) src + p, &ll, &n, &v, &afl); if (er == E_NODEF && undefok) { - lg_toglobal(s + p); + lg_toglobal(src + p); } if (!er) { if (afl) { - t[q++] = T_POINTER; - t[q++] = afl & 255; - t[q++] = v & 255; - t[q++] = (v >> 8) & 255; - t[q++] = n & 255; /* cheap fix for listing */ - t[q++] = (n >> 8) & 255; /* why is the label already resolved in t_conv? */ + out[q++] = T_POINTER; + out[q++] = afl & 255; + out[q++] = v & 255; + out[q++] = (v >> 8) & 255; + out[q++] = n & 255; /* cheap fix for listing */ + out[q++] = (n >> 8) & 255; /* why is the label already resolved in t_conv? */ } else { - t[q++] = T_LABEL; - t[q++] = n & 255; - t[q++] = (n >> 8) & 255; + out[q++] = T_LABEL; + out[q++] = n & 255; + out[q++] = (n >> 8) & 255; /*wval(q,v, 0);*/ } } else if (er == E_NODEF) { #ifdef DEBUG_AM -fprintf(stderr, "could not find %s\n", (char *)s+p); +fprintf(stderr, "could not find %s\n", (char *)src+p); #endif - t[q++] = T_LABEL; - t[q++] = n & 255; - t[q++] = (n >> 8) & 255; + out[q++] = T_LABEL; + out[q++] = n & 255; + out[q++] = (n >> 8) & 255; /* if(afl==SEG_ZEROUNDEF) uz++; */ @@ -2266,64 +2324,72 @@ fprintf(stderr, "could not find %s\n", (char *)s+p); } p += ll; } - } else if (s[p] <= '9' - && (s[p] > '0' || (s[p] == '0' && !ctypes))) { - tg_dez(s + p, &ll, &v); + } else if (src[p] <= '9' + && (src[p] > '0' || (src[p] == '0' && !ctypes))) { + // parse a decimal value + tg_dez(src + p, &ll, &v); p += ll; - wval(q, v, 'd'); + wvalo(q, v, 'd'); + //q += write_val(out + q, v, 'd'); } else /* handle encodings: hex, binary, octal, quoted strings */ - switch (s[p]) { + switch (src[p]) { case '0': // only gets here when "ctypes" is set, and starts with 0 // we here check for the C stype "0xHEX" and "0OCTAL" encodings - if ('x' == tolower(s[p + 1])) { + if ('x' == tolower(src[p + 1])) { // c-style hex - tg_hex(s + p + 2, &ll, &v); + tg_hex(src + p + 2, &ll, &v); p += 2 + ll; - wval(q, v, '$'); - } else if (isdigit(s[p + 1])) { + wvalo(q, v, '$'); + //q += write_val(out + q, v, '$'); + } else if (isdigit(src[p + 1])) { // c-style octal if digit follows - tg_oct(s + p + 1, &ll, &v); + tg_oct(src + p + 1, &ll, &v); p += 1 + ll; - wval(q, v, '&'); + wvalo(q, v, '&'); + //q += write_val(out + q, v, '&'); } else { // else use decimal (0) - tg_dez(s + p, &ll, &v); + tg_dez(src + p, &ll, &v); p += ll; - wval(q, v, 'd'); + wvalo(q, v, 'd'); + //q += write_val(out + q, v, 'd'); } break; case '$': - tg_hex(s + p + 1, &ll, &v); + tg_hex(src + p + 1, &ll, &v); p += 1 + ll; - wval(q, v, '$'); + wvalo(q, v, '$'); + //q += write_val(out + q, v, '$'); break; case '%': - tg_bin(s + p + 1, &ll, &v); + tg_bin(src + p + 1, &ll, &v); p += 1 + ll; - wval(q, v, '%'); + wvalo(q, v, '%'); + //q += write_val(out + q, v, '%'); break; case '&': - tg_oct(s + p + 1, &ll, &v); + tg_oct(src + p + 1, &ll, &v); p += 1 + ll; - wval(q, v, '&'); + wvalo(q, v, '&'); + //q += write_val(out + q, v, '&'); break; case '\'': case '\"': - er = tg_asc(s + p, t + q, &q, &p, na1, na2, n); + er = tg_asc(src + p, out + q, &q, &p, out_cnt_strings, out_cnt_chars_in_strings, n); break; case ',': if (mk) - while (s[p] != '\0' && s[p] != ';') { - while (s[p] == ' ') + while (src[p] != '\0' && src[p] != ';') { + while (src[p] == ' ') p++; - *nk += (s[p] == ','); - t[q++] = s[p++]; + *out_comma_count += (src[p] == ','); + out[q++] = src[p++]; } else { - *nk += 1; - t[q++] = s[p++]; + *out_comma_count += 1; + out[q++] = src[p++]; } break; default: @@ -2335,18 +2401,18 @@ fprintf(stderr, "could not find %s\n", (char *)s+p); } else /* operator */ { o = 0; - if (s[p] == ')' || s[p] == ']') { - t[q++] = s[p++]; + if (src[p] == ')' || src[p] == ']') { + out[q++] = src[p++]; operand = -operand + 1; - } else if (s[p] == ',') { - t[q++] = s[p++]; + } else if (src[p] == ',') { + out[q++] = src[p++]; if (mk) { // if only one comma, pass through all following text - esp. ",y" or ",x" etc fl++; } - *nk += 1; + *out_comma_count += 1; } else - switch (s[p]) { + switch (src[p]) { case '+': o = 1; break; @@ -2360,11 +2426,11 @@ fprintf(stderr, "could not find %s\n", (char *)s+p); o = 4; break; case '!': - if (s[p + 1] == '=') + if (src[p + 1] == '=') o = 12; break; case '<': - switch (s[p + 1]) { + switch (src[p + 1]) { case '<': o = 6; break; @@ -2380,7 +2446,7 @@ fprintf(stderr, "could not find %s\n", (char *)s+p); } break; case '>': - switch (s[p + 1]) { + switch (src[p + 1]) { case '>': o = 5; break; @@ -2396,7 +2462,7 @@ fprintf(stderr, "could not find %s\n", (char *)s+p); } break; case '=': - switch (s[p + 1]) { + switch (src[p + 1]) { case '<': o = 10; break; @@ -2413,13 +2479,13 @@ fprintf(stderr, "could not find %s\n", (char *)s+p); } break; case '&': - if (s[p + 1] == '&') + if (src[p + 1] == '&') o = 16; else o = 13; break; case '|': - if (s[p + 1] == '|') + if (src[p + 1] == '|') o = 17; else o = 15; @@ -2432,13 +2498,13 @@ fprintf(stderr, "could not find %s\n", (char *)s+p); break; } if (o) { - t[q++] = o; + out[q++] = o; p += lp[o]; } operand = -operand + 1; } - while (s[p] == ' ') + while (src[p] == ' ') p++; } } @@ -2462,23 +2528,23 @@ fprintf(stderr, "could not find %s\n", (char *)s+p); } } - if (s[p] == ';') { + if (src[p] == ';') { /* handle comments */ /* first find out how long */ int i; - for (i = p + 1; s[i] != '\0'; i++) + for (i = p + 1; src[i] != '\0'; i++) ; i = i - p; /* actual length of the comment, including zero-byte terminator */ /*if (i >= 1) {*/ /* there actually is a comment */ - t[q++] = T_COMMENT; - t[q++] = i & 255; - t[q++] = (i >> 8) & 255; - memcpy(t + q, s + p + 1, i); /* also copy zero terminator, used in listing */ + out[q++] = T_COMMENT; + out[q++] = i & 255; + out[q++] = (i >> 8) & 255; + memcpy(out + q, src + p + 1, i); /* also copy zero terminator, used in listing */ q += i; /*}*/ } - t[q++] = T_END; + out[q++] = T_END; /* FIXME: this is an unholy union of two "!" implementations :-( */ /* FIXME FIXME FIXME ... if (operand==1) { @@ -2486,7 +2552,7 @@ fprintf(stderr, "could not find %s\n", (char *)s+p); t[q++]=cast; } */ - *l = q; + *out_token_len = q; if (bytep) *bytep = byte; return (er); @@ -2531,10 +2597,10 @@ static int t_keyword(signed char *s, int *l, int *n) { // check all entries in opcode table from start to end for that hash code while (i < hash) { j = 0; - while (kt[i][j] != '\0' && kt[i][j] == tolower(s[j])) + while (keyword_table[i][j] != '\0' && keyword_table[i][j] == tolower(s[j])) j++; - if ((kt[i][j] == '\0') + if ((keyword_table[i][j] == '\0') && ((i == Kpcdef) || ((s[j] != '_') && !isalnum(s[j])))) break; i++; @@ -2572,133 +2638,163 @@ static int t_keyword(signed char *s, int *l, int *n) { return (i == hash ? E_NOKEY : E_OK); } -static void tg_dez(s, l, v) - signed char *s;int *l, *v; { +/******************************************************************** + * tokenize various value / number formats + */ + +/* + * tokenize a decimal integer value + */ +static void tg_dez(signed char *src, int *out_cnt_consumed, int *out_value) { int i = 0, val = 0; - while (isdigit(s[i])) - val = val * 10 + (s[i++] - '0'); + while (isdigit(src[i])) + val = val * 10 + (src[i++] - '0'); - *l = i; - *v = val; + *out_cnt_consumed = i; + *out_value = val; } -static void tg_bin(signed char *s, int *l, int *v) { +/* + * tokenize a binary value + */ +static void tg_bin(signed char *src, int *out_cnt_consumed, int *out_value) { int i = 0, val = 0; - while (s[i] == '0' || s[i] == '1') - val = val * 2 + (s[i++] - '0'); + while (src[i] == '0' || src[i] == '1') + val = val * 2 + (src[i++] - '0'); - *l = i; - *v = val; + *out_cnt_consumed = i; + *out_value = val; } -static void tg_oct(signed char *s, int *l, int *v) { +/* + * tokenize an octal value + */ +static void tg_oct(signed char *src, int *out_cnt_consumed, int *out_value) { int i = 0, val = 0; - while (s[i] < '8' && s[i] >= '0') - val = val * 8 + (s[i++] - '0'); + while (src[i] < '8' && src[i] >= '0') + val = val * 8 + (src[i++] - '0'); - *l = i; - *v = val; + *out_cnt_consumed = i; + *out_value = val; } -static void tg_hex(signed char *s, int *l, int *v) { +/* + * tokenize a hex value + */ +static void tg_hex(signed char *src, int *out_cnt_consumed, int *out_value) { int i = 0, val = 0; - while ((s[i] >= '0' && s[i] <= '9') - || (tolower(s[i]) <= 'f' && tolower(s[i]) >= 'a')) { - val = val * 16 + (s[i] <= '9' ? s[i] - '0' : tolower(s[i]) - 'a' + 10); + while ((src[i] >= '0' && src[i] <= '9') + || (tolower(src[i]) <= 'f' && tolower(src[i]) >= 'a')) { + val = val * 16 + (src[i] <= '9' ? src[i] - '0' : tolower(src[i]) - 'a' + 10); i++; } - *l = i; - *v = val; + *out_cnt_consumed = i; + *out_value = val; } /* * tokenize a string - handle two delimiter types, ' and " + * + * Depending on the base token (e.g. pseudo opcode) the string is either converted to the system charset or not */ -static int tg_asc(signed char *s, signed char *t, int *q, int *p, int *na1, - int *na2, int n) { +static int tg_asc(signed char *src, signed char *out, int *out_cnt_written, int *out_cnt_consumed, int *inout_cnt_strings, + int *inout_cnt_chars_in_strings, int base_token) { - int er = E_OK, i = 0, j = 0, bs = 0; + int er = E_OK, rd_cnt = 0, wr_cnt = 0, backslash_flag = 0; - signed char delimiter = s[i++]; + signed char delimiter = src[rd_cnt++]; #ifdef DEBUG_AM -fprintf(stderr, "tg_asc token = %i\n", n); +fprintf(stderr, "tg_asc token = %i\base_token", base_token); #endif - t[j++] = '"'; /* pass2 token for string */ - j++; /* skip place for length */ + out[wr_cnt++] = '"'; /* pass2 token for string */ + wr_cnt++; /* skip place for length to be filled in later */ - while (s[i] != '\0' && (bs || s[i] != delimiter)) { + /* + * Continue as long as there is input (src[] != 0), and there is not a delimiter, or the delimiter is backslash-escaped + */ + while (src[rd_cnt] != '\0' && (backslash_flag || src[rd_cnt] != delimiter)) { - /* implement backslashed quotes for 2.4 */ - if (n != Kbin && s[i] == '\\' && !bs && !xa23) { - bs = 1; - i++; + /* implement backslashed quotes for 2.4, but not for strings to be interpreted binary (Kbin) */ + if (base_token != Kbin && src[rd_cnt] == '\\' && !backslash_flag && !xa23) { + backslash_flag = 1; + rd_cnt++; continue; - } else - bs = 0; - /* do NOT convert for Kbin or Kaasc, or for initial parse */ - if (!n || n == Kbin || n == Kaasc) { - t[j++] = s[i]; + } else { + backslash_flag = 0; + } + + /* do NOT convert charset for Kbin or Kaasc, or for initial parse */ + if (!base_token || base_token == Kbin || base_token == Kaasc) { + out[wr_cnt++] = src[rd_cnt]; /* XXX 2.4 implement option for ^ for backwards compatibility */ - } else if (ca65 || (!xa23 && !mask) || s[i] != '^') { /* no escape code "^" - TODO: does ca65 has an escape code */ - t[j++] = convert_char(s[i]); + } else if (ca65 || (!xa23 && !mask) || src[rd_cnt] != '^') { /* no escape code "^" - TODO: does ca65 has an escape code */ + out[wr_cnt++] = convert_char(src[rd_cnt]); } else { /* escape code */ - signed char payload = s[i + 1]; + signed char payload = src[rd_cnt + 1]; switch (payload) { case '\0': er = E_SYNTAX; break; case '\"': if (payload == delimiter) { - t[j++] = convert_char(payload); - i++; + out[wr_cnt++] = convert_char(payload); + rd_cnt++; } else { er = E_SYNTAX; } break; case '\'': if (payload == delimiter) { - t[j++] = convert_char(payload); - i++; + out[wr_cnt++] = convert_char(payload); + rd_cnt++; } else { er = E_SYNTAX; } break; case '^': - t[j++] = convert_char('^'); - i++; + out[wr_cnt++] = convert_char('^'); + rd_cnt++; break; default: - t[j++] = convert_char(payload & 0x1f); - i++; + out[wr_cnt++] = convert_char(payload & 0x1f); + rd_cnt++; break; } } - i++; + rd_cnt++; } - if (j == 3) /* optimize single byte string to value */ + + /* post parse optimization / handling */ + + if (wr_cnt == 3) /* optimize single byte string to value */ { - t[0] = T_VALUE; - t[1] = t[2]; - t[2] = 0; - t[3] = 0; - t[4] = delimiter; - j += 2; + //wr_cnt = wr_cnt - 3 + write_val(out, out[2], delimiter); + + out[0] = T_VALUE; + out[1] = out[2]; + out[2] = 0; + out[3] = 0; + out[4] = delimiter; + wr_cnt += 2; } else { /* handle as string */ - t[1] = j - 2; - *na1 += 1; - *na2 += j - 2; + /* length of string */ + out[1] = wr_cnt - 2; + /* number of strings */ + *inout_cnt_strings += 1; + /* number of chars in string */ + *inout_cnt_chars_in_strings += wr_cnt - 2; } - if (s[i] == delimiter) { /* in case of no error */ - i++; /* skip ending delimiter */ + if (src[rd_cnt] == delimiter) { /* in case of no error */ + rd_cnt++; /* skip ending delimiter */ } - *q += j; - *p += i; + *out_cnt_written += wr_cnt; + *out_cnt_consumed += rd_cnt; return (er); } diff --git a/xa/src/xat.h b/xa/src/xat.h index 6e88bdd..b88a834 100644 --- a/xa/src/xat.h +++ b/xa/src/xat.h @@ -1,6 +1,6 @@ /* xa65 - 65xx/65816 cross-assembler and utility suite * - * Copyright (C) 1989-1997 André Fachat (a.fachat@physik.tu-chemnitz.de) + * Copyright (C) 1989-1997 Andr� Fachat (a.fachat@physik.tu-chemnitz.de) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -25,7 +25,7 @@ int t_p1(signed char *s, signed char *t, int *ll, int *al); int t_p2_l(signed char *t, int *ll, int *al); int b_term(char *s, int *v, int *l, int pc); -extern char *kt[]; // table of key words, needed for listing +extern char *keyword_table[]; // table of key words, needed for listing extern char *arith_ops[]; // table of arithmetic operators, needed for listing extern int number_of_valid_tokens; // as it says, in the "kt" table diff --git a/xa/src/xau.c b/xa/src/xau.c index 0ba0f9e..9ba4e82 100644 --- a/xa/src/xau.c +++ b/xa/src/xau.c @@ -22,10 +22,11 @@ #include #include -#include "xad.h" -#include "xau.h" #include "xah.h" #include "xal.h" +#include "xad.h" + +#include "xau.h" #undef DEBUG_UNDEF