From 6eb55586349e9daa34cd4430e3b694046ac64529 Mon Sep 17 00:00:00 2001 From: David Schmenk Date: Wed, 20 Dec 2017 17:51:26 -0800 Subject: [PATCH] on-the-metal compiler initial checkin --- src/inc/cmdsys.plh | 1 + src/toolsrc/codegen.pla | 793 +++++++++++++++++++++++++ src/toolsrc/lex.pla | 331 +++++++++++ src/toolsrc/parse.pla | 1249 +++++++++++++++++++++++++++++++++++++++ src/toolsrc/plasm.pla | 340 +++++++++++ 5 files changed, 2714 insertions(+) create mode 100644 src/toolsrc/codegen.pla create mode 100644 src/toolsrc/lex.pla create mode 100644 src/toolsrc/parse.pla create mode 100644 src/toolsrc/plasm.pla diff --git a/src/inc/cmdsys.plh b/src/inc/cmdsys.plh index c0e3a8f..c490a8f 100644 --- a/src/inc/cmdsys.plh +++ b/src/inc/cmdsys.plh @@ -4,6 +4,7 @@ import cmdsys // const FALSE = 0 const TRUE = not FALSE + const NULL = 0 // // Machine ID values // diff --git a/src/toolsrc/codegen.pla b/src/toolsrc/codegen.pla new file mode 100644 index 0000000..9796af7 --- /dev/null +++ b/src/toolsrc/codegen.pla @@ -0,0 +1,793 @@ +// +// Symbol table +// +//def dumpsym(idptr, idcnt)#0 +// while idcnt +// prword(idptr=>idval) +// cout(' ') +// prbyte(idptr->idtype) +// cout(' ') +// prstr(@idptr->idname) +// cout('=') +// if idptr->idtype & ADDR_TYPE +// if idptr=>idval & IS_CTAG +// prword((ctag_tbl:[idptr=>idval & MASK_CTAG] & MASK_CTAG) + codebuff) +// else +// prword(idptr=>idval + codebuff) +// fin +// else +// prword(idptr=>idval) +// fin +// crout +// idptr = idptr + idptr->idname + idrecsz +// idcnt-- +// loop +//end +def idmatch(nameptr, len, idptr, idcnt) + byte i + + while idcnt + if len == idptr->idname + for i = 1 to len + if nameptr->[i - 1] <> idptr->idname.[i] + break + fin + next + if i > len + return idptr + fin + fin + idptr = idptr + idptr->idname + idrecsz + idcnt-- + loop + return 0 +end +def id_lookup(nameptr, len) + word idptr + + idptr = idmatch(nameptr, len, idlocal_tbl, locals) + if idptr + return idptr + fin + idptr = idmatch(nameptr, len, idglobal_tbl, globals) + if idptr + return idptr + fin + return parse_err(@undecl_id) +end +def idglobal_lookup(nameptr, len) + return idmatch(nameptr, len, idglobal_tbl, globals) +end +def idlocal_add(namestr, len, type, size) + if idmatch(namestr, len, @idlocal_tbl, locals); return parse_err(@dup_id); fin + lastlocal=>idval = framesize + lastlocal->idtype = type | LOCAL_TYPE + nametostr(namestr, len, lastlocal + idname) + locals++ + lastlocal = lastlocal + idrecsz + len + if lastlocal > idlocal_tbl + idlocal_tblsz + prstr(@local_sym_overflw) + exit + fin + framesize = framesize + size + if framesize > 255 + return parse_err(@local_overflw) + fin + return TRUE +end +def iddata_add(namestr, len, type, size) + if idmatch(namestr, len, idglobal_tbl, globals); return parse_err(@dup_id); fin + lastglobal=>idval = datasize + lastglobal->idtype = type + nametostr(namestr, len, lastglobal + idname) + emit_iddata(datasize, size, lastglobal + idname) + globals++ + lastglobal = lastglobal + idrecsz + len + if lastglobal > idglobal_tbl + idglobal_tblsz + prstr(@global_sym_overflw) + exit + fin + datasize = datasize + size + return TRUE +end +def iddata_size(type, varsize, initsize)#0 + if varsize > initsize + datasize = datasize + varsize + emit_data(0, 0, 0, varsize - initsize) + else + datasize = datasize + initsize + fin +end +def idglobal_add(namestr, len, type, value) + if idmatch(namestr, len, idglobal_tbl, globals); return parse_err(@dup_id); fin + lastglobal=>idval = value + lastglobal->idtype = type + nametostr(namestr, len, lastglobal + idname) + globals++ + lastglobal = lastglobal + idrecsz + len + if lastglobal > idglobal_tbl + idglobal_tblsz + prstr(@global_sym_overflw) + exit + fin + return TRUE +end +def idfunc_add(namestr, len, tag) + return idglobal_add(namestr, len, FUNC_TYPE, tag) +end +def idconst_add(namestr, len, value) + return idglobal_add(namestr, len, CONST_TYPE, value) +end +def idglobal_init#0 + word op + byte i + + idglobal_tbl = heapalloc(IDGLOBALSZ) + idlocal_tbl = heapalloc(IDLOCALSZ) + codebuff = heapalloc(CODEBUFSZ) + codeptr = codebuff + entrypoint = 0 + globals = 0 + lastglobal = idglobal_tbl + codetag = -1 + // + //Init free op sequence list + // + freeop_lst = heapalloc(OPSEQNUM*t_opseq) + op = freeop_lst + for i = OPSEQNUM-1 downto 0 + op=>nextop = op + t_opseq + op = op + t_opseq + next + op=>nextop = NULL +end +def idlocal_init#0 + locals = 0 + framesize = 0 + lastlocal = idlocal_tbl +end +// +// Code tags. Upper bit is IS_RESOLVED flag, lower 15 is offset into codebuff +// Flags are: +// +def ctag_new + if codetag >= ctag_max; return parse_err(@ctag_full); fin + codetag = codetag + 1 + ctag_tbl:[codetag] = 0 // Unresolved, nothing to update yet + return codetag | IS_CTAG +end +def ctag_resolve(ctag)#0 + word updtptr, nextptr + + ctag = ctag & MASK_CTAG // Better be a ctag! + if ctag_tbl:[ctag] & IS_RESOLVED;parse_err(@dup_id); return; fin + updtptr = ctag_tbl:[ctag] & MASK_CTAG + while updtptr + // + // Update list of addresses needing resolution + // + updtptr = updtptr + codebuff + nextptr = *updtptr & MASK_CTAG + if *updtptr & IS_RELATIVE + *updtptr = codeptr - updtptr + else + *updtptr = codeptr + fin + updtptr = nextptr + loop + ctag_tbl:[ctag] = (codeptr - codebuff) | IS_RESOLVED +end +// +// Emit data/bytecode +// +def emit_byte(bval)#0 + ^codeptr = bval + codeptr++ +end +def emit_word(wval)#0 + *codeptr = wval + codeptr = codeptr + 2 +end +def emit_fill(size)#0 + memset(codeptr, 0, size) + codeptr = codeptr + size +end +def emit_op(op)#0 + lastop = op + emit_byte(op) +end +def emit_addr(tag)#0 + word updtptr + + if tag & IS_CTAG + tag = tag & MASK_CTAG + if ctag_tbl:[tag] & IS_RESOLVED + updtptr = (ctag_tbl:[tag] & MASK_CTAG) + codebuff + else + // + // Add to list of tags needing resolution + // + updtptr = ctag_tbl:[tag] & MASK_CTAG + ctag_tbl:[tag] = codeptr - codebuff + fin + emit_word(updtptr) + else + emit_word(tag + codebuff) + fin +end +def emit_reladdr(tag)#0 + word updtptr + + if tag & IS_CTAG + tag = tag & MASK_CTAG + if ctag_tbl:[tag] & IS_RESOLVED + updtptr = ((ctag_tbl:[tag] & MASK_CTAG) + codebuff) - codeptr + else + // + // Add to list of tags needing resolution + // + updtptr = ctag_tbl:[tag] | IS_RELATIVE + ctag_tbl:[tag] = codeptr - codebuff + fin + emit_word(updtptr) + else + emit_word(tag - (codeptr - codebuff)) + fin +end +def emit_iddata(value, size, namestr)#0 + emit_fill(size) +end +def emit_data(vartype, consttype, constval, constsize) + byte i + word size, chrptr + + if consttype == 0 + size = constsize + emit_fill(constsize) + elsif consttype == STR_TYPE + size = constsize + chrptr = constval + constsize-- + emit_byte(constsize) + while constsize > 0 + emit_byte(^chrptr) + chrptr++ + constsize-- + loop + else + if vartype & BYTE_TYPE + size = 1 + emit_byte(constval) + else + size = 2 + if consttype == CONSTADDR_TYPE + emit_addr(constval) + else + emit_word(constval) + fin + fin + fin + return size +end +def emit_const(cval)#0 + if cval == 0 + emit_op($00) + elsif cval > 0 and cval < 256 + emit_op($2A) + emit_byte(cval) + else + emit_op($2C) + emit_word(cval) + fin +end +def emit_constr(str, size)#0 + emit_op($2E) + emit_data(0, STR_TYPE, str, size) +end +def emit_lb#0 + emit_op($60) +end +def emit_lw#0 + emit_op($62) +end +def emit_llb(offset)#0 + emit_op($64) + emit_byte(offset) +end +def emit_llw(offset)#0 + emit_op($66) + emit_byte(offset) +end +def emit_lab(tag, offset)#0 + if tag & IS_CTAG and offset + parse_err(@no_ctag_offst) + else + emit_op($68) + emit_addr(tag+offset) + fin +end +def emit_law(tag, offset)#0 + if tag & IS_CTAG and offset + parse_err(@no_ctag_offst) + else + emit_op($6A) + emit_addr(tag+offset) + fin +end +def emit_sb#0 + emit_op($70) +end +def emit_sw#0 + emit_op($72) +end +def emit_slb(offset)#0 + emit_op($74) + emit_byte(offset) +end +def emit_slw(offset)#0 + emit_op($76) + emit_byte(offset) +end +def emit_dlb(offset)#0 + emit_op($6C) + emit_byte(offset) +end +def emit_dlw(offset)#0 + emit_op($6E) + emit_byte(offset) +end +def emit_sab(tag, offset)#0 + if tag & IS_CTAG and offset + parse_err(@no_ctag_offst) + else + emit_op($78) + emit_addr(tag+offset) + fin +end +def emit_saw(tag, offset)#0 + if tag & IS_CTAG and offset + parse_err(@no_ctag_offst) + else + emit_op($7A) + emit_addr(tag+offset) + fin +end +def emit_dab(tag, offset)#0 + if tag & IS_CTAG and offset + parse_err(@no_ctag_offst) + else + emit_op($7C) + emit_addr(tag+offset) + fin +end +def emit_daw(tag, offset)#0 + if tag & IS_CTAG and offset + parse_err(@no_ctag_offst) + else + emit_op($7E) + emit_addr(tag+offset) + fin +end +def emit_call(tag)#0 + emit_op($54) + emit_addr(tag) +end +def emit_ical#0 + emit_op($56) +end +def emit_localaddr(offset)#0 + emit_op($28) + emit_byte(offset) +end +def emit_globaladdr(tag, offset)#0 + if tag & IS_CTAG and offset + parse_err(@no_ctag_offst) + else + emit_op($26) + emit_addr(tag+offset) + fin +end +def emit_indexbyte#0 + emit_op($02) +end +def emit_indexword#0 + emit_op($1E) +end +def emit_unaryop(op) + when op + is NEG_TKN + emit_op($10); break + is ALT_COMP_TKN + is COMP_TKN + emit_op($12); break + is LOGIC_NOT_TKN + emit_op($20); break + is INC_TKN + emit_op($0C); break + is DEC_TKN + emit_op($0E); break + is BPTR_TKN + emit_op($60); break + is WPTR_TKN + emit_op($62); break + otherwise + return FALSE + wend + return TRUE +end +def emit_binaryop(op) + when op + is MUL_TKN + // + // Replace MUL 2 with SHL 1 + // + if lastop == $2A and ^(codeptr - 1) == 2 // CB 2 + codeptr = codeptr - 1 + emit_byte(1) // CB 1 + emit_op($1A) // SHL + else + emit_op($06) + fin + break + is DIV_TKN + // + // Replace DIV 2 with SHR 1 + // + if lastop == $2A and ^(codeptr - 1) == 2 // CB 2 + codeptr = codeptr - 1 + emit_byte(1) // CB 1 + emit_op($1C) // SHR + else + emit_op($08) + fin + break + is MOD_TKN + emit_op($0A); break + is ADD_TKN + // + // Replace ADD 1 with INCR + // + if lastop == $2A and ^(codeptr - 1) == 1 // CB 1 + codeptr = codeptr - 2 + emit_op($0C) // INC_OP + else + emit_op($02) + fin + break + is SUB_TKN + // + // Replace SUB 1 with DECR + // + if lastop == $2A and ^(codeptr - 1) == 1 // CB 1 + codeptr = codeptr - 2 + emit_op($0E) // DEC_OP + else + emit_op($04) + fin + break + is SHL_TKN + emit_op($1A); break + is SHR_TKN + emit_op($1C); break + is AND_TKN + emit_op($14); break + is OR_TKN + emit_op($16); break + is EOR_TKN + emit_op($18); break + is EQ_TKN + emit_op($40); break + is NE_TKN + emit_op($42); break + is GE_TKN + emit_op($48); break + is LT_TKN + emit_op($46); break + is GT_TKN + emit_op($44); break + is LE_TKN + emit_op($4A); break + is LOGIC_OR_TKN + emit_op($22); break + is LOGIC_AND_TKN + emit_op($24); break + otherwise + return FALSE + wend + return TRUE +end +def emit_brtru(tag)#0 + emit_op($4E) + emit_reladdr(tag) +end +def emit_brfls(tag)#0 + emit_op($4C) + emit_reladdr(tag) +end +def emit_brgt(tag)#0 + emit_op($38) + emit_reladdr(tag) +end +def emit_brlt(tag)#0 + emit_op($3A) + emit_reladdr(tag) +end +def emit_brne(tag)#0 + emit_op($3E) + emit_reladdr(tag) +end +def emit_branch(tag)#0 + emit_op($50) + emit_reladdr(tag) +end +def emit_drop#0 + emit_op($30) +end +def emit_leave#0 + if framesize + emit_op($5A) + else + emit_op($5C) + fin +end +def emit_enter(cparams)#0 + emit_byte(emit_enter.[0]) + emit_byte(emit_enter.[1]) + emit_byte(emit_enter.[2]) + if framesize + emit_op($58) + emit_byte(framesize) + emit_byte(cparams) + fin +end +// +//New/release sequence ops +// +def new_op + word op + op = freeop_lst + if not op + puts("Compiler out of sequence ops!") + return NULL + fin + freeop_lst = freeop_lst=>nextop + op=>nextop = NULL + return op +end +def release_op(op)#0 + if op + op=>nextop = freeop_lst + freeop_lst = op + fin +end +def release_seq(seq)#0 +{ + word op + while seq + op = seq + seq = seq=>nextop + // + //Free this op + // + op=>nextop = freeop_lst + freeop_lst = op + loop +end +// +//Generate a sequence of code +// +def gen_seq(seq, opcode, cval, tag, offsz, type) +{ + word op + + if not seq + seq = new_op + op = seq + else + op = seq + while op=>nextop; op = op=>nextop; loop + op=>nextop = new_op + op = op=>nextop + fin + op->code = opcode + op=>val = cval + op=>tag = tag + op=>offsz = offsz + op->type = type + return seq +end +// +//Append one sequence to the end of another +// +def cat_seq(seq1, seq2) +{ + word op + + if not seq1; return seq2; fin + op = seq1 + while op=>nextop; op = op=>nextop; loop + op=>nextop = seq2 + return seq1 +fin +// +//Emit the pending sequence +// +def emit_pending_seq#0 + word lcl_pending, op + // This is called by some of the emit_*() functions to ensure that any + // pending ops are emitted before they emit their own op when they are + // called from the parser. However, this function itself calls some of those + // emit_*() functions to emit instructions from the pending sequence, which + // would cause an infinite loop if we weren't careful. We therefore set + // pending_seq to null on entry and work with a local copy, so if this + // function calls back into itself it is a no-op. + if not pending_seq; return; fin + lcl_pending = pending_seq + pending_seq = NULL + //if outflags & OPTIMIZE + // while crunch_seq(@lcl_pending, 0); loop + // while crunch_seq(@lcl_pending, 1); loop + //fin + while lcl_pending + op = lcl_pending + when op->code + is NEG_CODE + is COMP_CODE + is LOGIC_NOT_CODE + is INC_CODE + is DEC_CODE + is BPTR_CODE + is WPTR_CODE + emit_unaryop(op->code) + break + is MUL_CODE + is DIV_CODE + is MOD_CODE + is ADD_CODE + is SUB_CODE + is SHL_CODE + is SHR_CODE + is AND_CODE + is OR_CODE + is EOR_CODE + is EQ_CODE + is NE_CODE + is GE_CODE + is LT_CODE + is GT_CODE + is LE_CODE + is LOGIC_OR_CODE + is LOGIC_AND_CODE + emit_op(op->code) + break + is CONST_CODE + emit_const(op=>val) + break + is STR_CODE + emit_conststr(op=>val) + break + is LB_CODE + emit_lb() + break + is LW_CODE + emit_lw() + break + is LLB_CODE + emit_llb(op=>offsz) + break + is LLW_CODE + emit_llw(op=>offsz) + break + is LAB_CODE + emit_lab(op=>tag, op=>offsz, op->type) + break + is LAW_CODE + emit_law(op=>tag, op=>offsz, op->type) + break + is SB_CODE + emit_sb() + break + is SW_CODE + emit_sw() + break; + is SLB_CODE + emit_slb(op-=>offsz) + break + is SLW_CODE + emit_slw(op=>offsz) + break + is DLB_CODE + emit_dlb(op=>offsz) + break + is DLW_CODE + emit_dlw(op=>offsz) + break + is SAB_CODE + emit_sab(op=>tag, op=>offsz, op->type) + break + is SAW_CODE + emit_saw(op=>tag, op=>offsz, op->type) + break + is DAB_CODE + emit_dab(op=>tag, op=>offsz, op->type) + break + is DAW_CODE + emit_daw(op=>tag, op=>offsz, op->type) + break + is CALL_CODE + emit_call(op=>tag, op->type) + break + is ICAL_CODE + emit_ical() + break + is LADDR_CODE + emit_localaddr(op=>offsz) + break + is GADDR_CODE + emit_globaladdr(op=>tag, op=>offsz, op->type) + break + is INDEXB_CODE + emit_indexbyte + break + is INDEXW_CODE + emit_indexword + break + is DROP_CODE + emit_drop + break + is DUP_CODE + emit_dup + break + is PUSH_EXP_CODE + emit_push_exp + break + is PULL_EXP_CODE + emit_pull_exp + break + is BRNCH_CODE + emit_brnch(op=>tag) + break + is BRFALSE_CODE + emit_brfls(op=>tag) + break + is BRTRUE_CODE + emit_brtru(op=>tag) + break + is CODETAG_CODE + printf("_B%03d%c\n", op->tag, LBL); + break; + is NOP_CODE + break + otherwise + return + wend + lcl_pending = lcl_pending=>nextop; + // + //Free this op + // + op=>nextop = freeop_lst + freeop_lst = op + loop +end +// +//Emit a sequence of ops (into the pending sequence) +// +def emit_seq(seq)#0 + word op + byte string + string = 0 + op = seq + while op + if op->code == STR_CODE; string = 1; fin + op = op=>nextop + loop + pending_seq = cat_seq(pending_seq, seq) + // The source code comments in the output are much more logical if we don't + // merge multiple sequences together. There's no value in doing this merging + // if we're not optimizing, and we optionally allow it to be prevented even + // when we are optimizing by specifing the -N (NO_COMBINE) flag. + // + // We must also force output if the sequence includes a CS opcode, as the + // associated 'constant' is only temporarily valid. + if !(outflags & OPTIMIZE) or (outflags & NO_COMBINE) or string + emit_pending_seq +end diff --git a/src/toolsrc/lex.pla b/src/toolsrc/lex.pla new file mode 100644 index 0000000..03bf073 --- /dev/null +++ b/src/toolsrc/lex.pla @@ -0,0 +1,331 @@ +// +// Lexical anaylzer +// +def isalpha(c) + if c >= 'A' and c <= 'Z' + return TRUE + elsif c >= 'a' and c <= 'z' + return TRUE + elsif c == '_' + return TRUE + fin + return FALSE +end +def isnum(c) + return c >= '0' and c <= '9' +end +def isalphanum(c) + if c >= 'A' and c <= 'Z' + return TRUE + elsif c >= '0' and c <= '9' + return TRUE + elsif c >= 'a' and c <= 'z' + return TRUE + elsif c == '_' + return TRUE + fin + return FALSE +end +def keymatch + byte i, keypos + word chrptr + + keypos = 0 + while keywrds[keypos] < tknlen + keypos = keypos + keywrds[keypos] + 2 + loop + chrptr = tknptr - 1 + while keywrds[keypos] == tknlen + for i = 1 to tknlen + if toupper(^(chrptr + i)) <> keywrds[keypos + i] + break + fin + next + if i > tknlen + return keywrds[keypos + keywrds[keypos] + 1] + fin + keypos = keypos + keywrds[keypos] + 2 + loop + return ID_TKN +end +def scan + // + // Skip whitespace + // + while ^scanptr == ' ' + scanptr++ + loop + tknptr = scanptr + scanchr = ^scanptr + // + // Scan for token based on first character + // + if isalpha(scanchr) + // + // ID, either variable name or reserved word + // + repeat + scanptr++ + until not isalphanum(^scanptr) + tknlen = scanptr - tknptr + token = keymatch + elsif scanchr >= '0' and scanchr <= '9' // isnum() + // + // Decimal constant + // + token = INT_TKN + constval = 0 + repeat + constval = constval * 10 + ^scanptr - '0' + scanptr++ + until ^scanptr < '0' or ^scanptr > '9' + else + // + // Potential multiple character tokens + // + when scanchr + is '$' + // + // Hexadecimal constant + // + token = INT_TKN + constval = 0 + repeat + scanptr++ + if ^scanptr >= '0' and ^scanptr <= '9' + constval = (constval << 4) + ^scanptr - '0' + elsif ^scanptr >= 'A' and ^scanptr <= 'F' + constval = (constval << 4) + ^scanptr - '7'// 'A'-10 + elsif ^scanptr >= 'a' and ^scanptr <= 'f' + constval = (constval << 4) + ^scanptr - 'W'// 'a'-10 + else + break + fin + until !^scanptr + break + is '\'' + // + // Character constant + // + token = CHR_TKN + scanptr++ + if ^scanptr <> '\\' + constval = ^scanptr + else + scanptr++ + when ^scanptr + is 'n' + constval = $0D; break + is 'r' + constval = $0A; break + is 't' + constval = $09; break + otherwise + constval = ^scanptr + wend + fin + if ^(scanptr + 1) <> '\'' + return parse_err(@bad_cnst) + fin + scanptr = scanptr + 2 + break + is '"' + // + // String constant + // + token = STR_TKN + constval = @strconst+1 + strconst = 0 + scanptr++ + while ^scanptr and ^scanptr <> '"' + if ^scanptr <> '\\' + ^constval = ^scanptr + else + scanptr++ + when ^scanptr + is 'n' + ^constval = $0D; break + is 'r' + ^constval = $0A; break + is 't' + ^constval = $09; break + otherwise + constval = ^scanptr + wend + fin + constval++ + strconst++ + scanptr++ + loop + if !^scanptr; return parse_err(@bad_cnst); fin + constval = @strconst + scanptr++ + break + is '/' + if ^(scanptr + 1) == '/' + token = EOL_TKN + ^scanptr = $00 + else + token = DIV_TKN + scanptr++ + fin + break + is '=' + if ^(scanptr + 1) == '=' + token = EQ_TKN + scanptr = scanptr + 2 + elsif ^(scanptr + 1) == '>' + token = PTRW_TKN + scanptr = scanptr + 2 + else + token = SET_TKN + scanptr++ + fin + break + is '-' + if ^(scanptr + 1) == '>' + token = PTRB_TKN + scanptr = scanptr + 2 + elsif ^(scanptr + 1) == '-' + token = DEC_TKN + scanptr = scanptr + 2 + else + token = SUB_TKN + scanptr++ + fin + break + is '+' + if ^(scanptr + 1) == '+' + token = INC_TKN + scanptr = scanptr + 2 + else + token = ADD_TKN + scanptr++ + fin + break + is '>' + if ^(scanptr + 1) == '>' + token = SHR_TKN + scanptr = scanptr + 2 + elsif ^(scanptr + 1) == '=' + token = GE_TKN + scanptr = scanptr + 2 + else + token = GT_TKN + scanptr++ + fin + break + is '<' + if ^(scanptr + 1) == '<' + token = SHL_TKN + scanptr = scanptr + 2 + elsif ^(scanptr + 1) == '=' + token = LE_TKN + scanptr = scanptr + 2 + elsif ^(scanptr + 1) == '>' + token = NE_TKN + scanptr = scanptr + 2 + else + token = LT_TKN + scanptr++ + fin + break + is ': + if ^(scanptr + 1) == ':' + token = TRIELSE_TKN; + scanptr = scanptr + 2 + else + token = COLON_TKN; + scanptr++ + fin + break + is '?' + if ^(scanptr + 1) == '?' + token = TERNARY_TKN; + scanptr = scanptr + 2 + else + token = TERNARY_TKN; + scanptr++ + fin + break + is 0 + is ';' + if token <> EOF_TKN + token = EOL_TKN + fin + break + otherwise + // + // Simple single character tokens + // + token = scanchr | $80 + scanptr++ + wend + fin + tknlen = scanptr - tknptr + return token +end +def rewind(ptr)#0 + scanptr = ptr +end +def lookahead + word backptr, backtkn + byte prevtkn, prevlen, look + backptr = scanptr + backtkn = tknptr + prevtkn = token + prevlen = tknlen + look = scan + scanptr = backptr + tknptr = backtkn + token = prevtkn + tknlen = prevlen + return look +end +// +// Get next line of input +// +def nextln + if ^scanptr == ';' + scanptr++ + scan + else + if token <> EOL_TKN or token <> EOF_TKN; return parse_err("Extraneous characters"); fin + scanptr = inbuff + instr = fileio:read(refnum, inbuff, 127) + if instr + ^(inbuff + instr + 1) = 0 // NULL terminate string + lineno++ + if !(lineno & $0F); putc('.'); fin + if scan == INCLUDE_TKN + if incref; return parse_err("Nested INCLUDEs not allowed"); fin + if scan <> STRING_TKN; return parse_err("Missing INCLUDE file"); fin + incfile = scanptr - constval + memcpy(@incfile + 1, constval, incfile) + sysincbuf = heapallocalign(1024, 256) + incref = fileio:opensys(@incfile, sysincbuf) + if not incref; return parse_err("Unable to open INCLUDE file"); fin + fileio:newline(incref, $7F, $0D) + refnum = incref + parsefile = @incfile + srcline = lineno + lineno = 0 + return nextln + fin + else + if refnum == incref + fileio:close(incnum) + heaprelease(sysincbuf) + incref = 0 + refnum = srcref + parsefile = srcfile + lineno = srcline + return nextln + else + *instr = 0 + token = EOF_TKN + fin + fin + fin + return token +end diff --git a/src/toolsrc/parse.pla b/src/toolsrc/parse.pla new file mode 100644 index 0000000..591f61e --- /dev/null +++ b/src/toolsrc/parse.pla @@ -0,0 +1,1249 @@ +// +// Alebraic op to stack op +// +def push_op(op, prec)#0 + opsp++ + if opsp == 16; parse_err("Op stack overflow"); return; fin + opstack[opsp] = op + precstack[opsp] = prec +end +def pop_op + if opsp < 0; return parse_err("Op stack underflow"); fin + opsp-- + return opstack[opsp + 1] +end +def tos_op + if opsp < 0 + return 0 + fin + return opstack[opsp] +end +def tos_op_prec(tos) + if opsp <= tos + return 100 + fin + return precstack[opsp] +end +def push_val(value, size, type)#0 + valsp++ + if valsp == 16; parse_err("Eval stack overflow"); return; fin + valstack[valsp] = value + sizestack[valsp] = size + typestack[valsp] = type +end +def pop_val(valptr, sizeptr, typeptr) + if valsp < 0; return parse_err("Eval stack underflow"); fin + *valptr = valstack[valsp] + ^sizeptr = sizestack[valsp] + ^typeptr = typestack[valsp] + valsp-- + return valsp + 1 +end +// +// Constant expression parsing +// +def calc_binaryop(op) + word val1, val2 + byte size1, size2, type1, type2 + + if not pop_val(@val2, @size2, @type2); return 0; fin + pop_val(@val1, @size1, @type1) + if type1 <> CONST_TYPE and type2 <> CONST_TYPE; return parse_err(@bad_cnst); fin + when op + is MUL_TKN + val1 = val1 * val2 + break + is DIV_TKN + val1 = val1 / val2 + break + is MOD_TKN + val1 = val1 % val2 + break + is ADD_TKN + val1 = val1 + val2 + break + is SUB_TKN + val1 = val1 - val2 + break + is SHL_TKN + val1 = val1 << val2 + break + is SHR_TKN + val1 = val1 >> val2 + break + is AND_TKN + val1 = val1 & val2 + break + is OR_TKN + val1 = val1 | val2 + break + is EOR_TKN + val1 = val1 ^ val2 + break + otherwise + return FALSE + wend + if size2 > size1; size1 = size2; fin + push_val(val1, size1, type1) + return TRUE +end +def parse_constterm(valptr, sizeptr) + word type + + when scan + is OPEN_PAREN_TKN + type = parse_constexpr(valptr, sizeptr) + if token <> CLOSE_PAREN_TKN; return parse_err(@no_close_paren); fin + return type + is ID_TKN + is INT_TKN + is CHR_TKN + is STR_TKN + return token + wend + return FALSE +end +def parse_constval + byte mod, type, size + word idptr, ctag, value + + value = 0 + size = 1 + mod = 0 + repeat + type = parse_constterm(@value, @size) + if !type + when token + is SUB_TKN + mod = mod | 1; break + is ALT_COMP_TKN + is COMP_TKN + mod = mod | 2; break + is LOGIC_NOT_TKN + mod = mod | 4; break + is AT_TKN + mod = mod | 8; break + is ADD_TKN + break + otherwise + return 0 + wend + fin + until type + when token + is CLOSE_PAREN_TKN + break + is STR_TKN + size = tknlen - 1 + value = constval + type = STR_TYPE + if mod; return parse_err(@bad_op); fin + break + is CHR_TKN + size = 1 + value = constval + type = CONST_TYPE + break + is INT_TKN + size = 2 + value = constval + type = CONST_TYPE + break + is ID_TKN + size = 2 + idptr = id_lookup(tknptr, tknlen) + if !idptr; return parse_err(@bad_cnst); fin + type = idptr->idtype + if type & ADDR_TYPE + if mod <> 8; return parse_err(@bad_cnst); fin + type = CONSTADDR_TYPE + fin + value = idptr=>idval + break + otherwise + return 0 + wend + if mod & 1 + value = -value + fin + if mod & 2 + value = ~value + fin + if mod & 4 + value = !value + fin + push_val(value, size, type) + return type +end +def parse_constexpr(valptr, sizeptr) + byte prevmatch, matchop, i, type + word optos + + *valptr = 0 + ^sizeptr = 1 + matchop = 0 + optos = opsp + repeat + prevmatch = matchop + matchop = 0 + if parse_constval + matchop = 1 + scan + for i = 0 to bops_tblsz + if token == bops_tbl[i] + matchop = 2 + if bops_prec[i] >= tos_op_prec(optos) + if !calc_binaryop(pop_op); return parse_err(@bad_op); fin + fin + push_op(token, bops_prec[i]) + break + fin + next + fin + until matchop <> 2 + if matchop == 0 and prevmatch == 0; return 0; fin + if matchop == 0 and prevmatch == 2; return parse_err(@missing_op); fin + while optos < opsp + if !calc_binaryop(pop_op); return parse_err(@bad_op); fin + loop + pop_val(valptr, sizeptr, @type) + return type +end +def parse_const(valptr) + word idptr + + when scan + is CHR_TKN + is INT_TKN + *valptr = constval + break + is ID_TKN + idptr = id_lookup(tknptr, tknlen) + if !idptr; return parse_err(@bad_cnst); fin + if idptr->idtype & CONST_TYPE + *valptr = idptr=>idval + break + fin + otherwise + return 0 + wend + return CONST_TYPE +end +// +// Normal expression parsing +// +def ispostop + when scan + is OPEN_PAREN_TKN + is OPEN_BRACKET_TKN + is DOT_TKN + is COLON_TKN + is PTRB_TKN + is PTRW_TKN + return TRUE + wend + return FALSE +end +def parse_term + when scan + is OPEN_PAREN_TKN + if !parse_expr + return FALSE + fin + if token <> CLOSE_PAREN_TKN; return parse_err(@no_close_paren); fin + is ID_TKN + is INT_TKN + is CHR_TKN + is STR_TKN + return TRUE + wend + return FALSE +end +def parse_value(rvalue) + byte cparams, deref, type, emit_val + word optos, idptr, value + byte const_size, ref_type + word ref_offset, const_offset + + deref = rvalue + optos = opsp + type = 0 + emit_val = FALSE + value = 0 + + // + // Parse pre-ops + // + while !parse_term + when token + is ADD_TKN + break + is BPTR_TKN + if deref + push_op(token, 0) + else + deref++ + type = type | BPTR_TYPE + fin + break + is WPTR_TKN + if deref + push_op(token, 0) + else + deref++ + type = type | WPTR_TYPE + fin + break + is AT_TKN + deref-- + break + is SUB_TKN + is COMP_TKN + is LOGIC_NOT_TKN + push_op(token, 0) + break + otherwise + return 0 + wend + loop + // + // Determine terminal type + // + when token + is INT_TKN + is CHR_TKN + value = constval + type = type | CONST_TYPE + break + is ID_TKN + idptr = id_lookup(tknptr, tknlen) + if !idptr; return 0; fin + if !(idptr->idtype); return 0; fin + type = type | idptr->idtype + value = idptr=>idval + break + is CLOSE_PAREN_TKN + // type = type | WORD_TYPE + emit_val = TRUE + break + is STR_TKN + // + // Special case + // + emit_constr(constval, tknlen - 1) + scan + return WORD_TYPE + break + otherwise + return 0 + wend + // + // Constant optimizations + // + if type & CONST_TYPE + cparams = TRUE + while optos < opsp and cparams + when tos_op + is NEG_TKN + pop_op + value = -value + break + is ALT_COMP_TKN + is COMP_TKN + pop_op + value = ~value + break + is LOGIC_NOT_TKN + pop_op + value = !value + break + otherwise + cparams = FALSE + wend + loop + fin + // + // Parse post-ops + // + ref_type = type & ~PTR_TYPE + ref_offset = 0 + while ispostop + when token + is OPEN_PAREN_TKN + // + // Function call + // + if emit_val + if ref_offset <> 0 + emit_const(ref_offset) + emit_op($02) + ref_offset = 0 + fin + if ref_type & BPTR_TYPE; emit_lb + elsif ref_type & WPTR_TYPE; emit_lw + fin + if lookahead <> CLOSE_PAREN_TKN + emit_push + fin + fin + cparams = 0 + while parse_expr + cparams = cparams + 1 + if token <> COMMA_TKN + break + fin + loop + if token <> CLOSE_PAREN_TKN + return parse_err(@no_close_paren) + fin + if ref_type & FUNC_CONST_TYPE + emit_call(value) + else + if !emit_val + if ref_type & CONST_TYPE + emit_const(value) + elsif ref_type & VAR_TYPE + if type & LOCAL_TYPE + emit_llw(value + ref_offset) + else + emit_law(value, ref_offset) + fin + ref_offset = 0 + fin + else + if cparams + emit_pull + fin + fin + emit_ical + fin + emit_val = TRUE + ref_type = 0 + break + is OPEN_BRACKET_TKN + // + // Array of arrays + // + if !emit_val + if type & CONST_TYPE + emit_const(value) + elsif type & ADDR_TYPE + if type & LOCAL_TYPE + emit_localaddr(value + ref_offset) + else + emit_globaladdr(value, ref_offset) + fin + ref_offset = 0 + fin + emit_val = TRUE + else + if ref_offset <> 0 + emit_const(ref_offset) + emit_op($02) + ref_offset = 0 + fin + fin + while parse_expr + if token <> COMMA_TKN + break + fin + emit_indexword + emit_lw + loop + if token <> CLOSE_BRACKET_TKN + return parse_err(@no_close_bracket) + fin + if ref_type & (WPTR_TYPE | WORD_TYPE) + emit_indexword + ref_type = WPTR_TYPE + else + emit_indexbyte + ref_type = BPTR_TYPE + fin + break + is PTRB_TKN + is PTRW_TKN + // + // Structure member pointer + // + if !emit_val + if (type & CONST_TYPE) + emit_const(value) + elsif type & ADDR_TYPE + if type & LOCAL_TYPE + if ref_type & BYTE_TYPE + emit_llb(value + ref_offset) + else + emit_llw(value + ref_offset) + fin + else + if ref_type & BYTE_TYPE + emit_lab(value, ref_offset) + else + emit_law(value, ref_offset) + fin + fin + fin + emit_val = 1; + else + if ref_offset <> 0 + emit_const(ref_offset) + emit_op($02) + ref_offset = 0 + fin + if ref_type & BPTR_TYPE; emit_lb + elsif ref_type & WPTR_TYPE; emit_lw; fin + fin + if token == PTRB_TKN + ref_type = BPTR_TYPE + else + ref_type = WPTR_TYPE + fin + ref_offset = 0 + if !parse_const(@ref_offset) + rewind(tknptr) + fin + if ref_offset <> 0 + emit_const(ref_offset) + emit_op($02) + ref_offset = 0 + fin + break + is DOT_TKN + is COLON_TKN + // + // Structure member offset + // + if ref_type & (VAR_TYPE | CONST_TYPE) + if token == DOT_TKN + ref_type = BYTE_TYPE + else + ref_type = WORD_TYPE + fin + else + if token == DOT_TKN + ref_type = BPTR_TYPE + else + ref_type = WPTR_TYPE + fin + fin + if parse_const(@const_offset) + ref_offset = ref_offset + const_offset + else + rewind(tknptr) + fin + if !emit_val + if type & CONST_TYPE + value = value + ref_offset + ref_offset = 0 + elsif type & FUNC_TYPE + emit_globaladdr(value, ref_offset) + ref_offset = 0 + emit_val = TRUE + fin + fin + break + wend + loop + if emit_val + if ref_offset <> 0 + emit_const(ref_offset) + emit_op($02) + ref_offset = 0 + fin + if deref + if ref_type & BPTR_TYPE + emit_lb + elsif ref_type & WPTR_TYPE + emit_lw + fin + fin + else // emit_val + if deref + if ref_type & CONST_TYPE + emit_const(value) + if ref_type & VAR_TYPE + if ref_type & BYTE_TYPE + emit_lb() + else + emit_lw() + fin + fin + elsif ref_type & FUNC_TYPE + emit_call(value) + elsif ref_type & VAR_TYPE + if type & LOCAL_TYPE + if ref_type & BYTE_TYPE + emit_llb(value + ref_offset) + else + emit_llw(value + ref_offset) + fin + else + if ref_type & BYTE_TYPE + emit_lab(value, ref_offset) + else + emit_law(value, ref_offset) + fin + fin + fin + else + if type & CONST_TYPE + emit_const(value) + elsif type & ADDR_TYPE + if type & LOCAL_TYPE + emit_localaddr(value + ref_offset) + else + emit_globaladdr(value, ref_offset) + fin + fin + fin + fin // emit_val + while optos < opsp + if !emit_unaryop(pop_op); return parse_err(@bad_op); fin + loop + if type & PTR_TYPE + ref_type = type + fin + if !ref_type + ref_type = WORD_TYPE + fin + return ref_type +end +def parse_expr + byte prevmatch, matchop, i + word optos + + matchop = 0 + optos = opsp + repeat + prevmatch = matchop + matchop = 0 + if parse_value(1) + matchop = 1 + for i = 0 to bops_tblsz + if token == bops_tbl[i] + matchop = 2 + if bops_prec[i] >= tos_op_prec(optos) + if !emit_binaryop(pop_op); return parse_err(@bad_op); fin + fin + push_op(token, bops_prec[i]) + break + fin + next + fin + until matchop <> 2 + if matchop == 0 and prevmatch == 2; return parse_err(@missing_op); fin + while optos < opsp + if !emit_binaryop(pop_op); return parse_err(@bad_op); fin + loop + return matchop or prevmatch +end +def parse_stmnt + byte type, elem_type, elem_size, i + word elem_offset, tag_prevbrk, tag_prevcnt, tag_else, tag_endif, tag_while, tag_wend + word tag_repeat, tag_for, tag_choice, tag_of, idptr, saveptr, addr, stepdir + + if token <> END_TKN and token <> DONE_TKN and token <> OF_TKN and token <> DEFAULT_TKN + prevstmnt = token + fin + when token + is IF_TKN + if !parse_expr; return 0; fin + tag_else = ctag_new + tag_endif = ctag_new + emit_brfls(tag_else) + scan + repeat + while parse_stmnt + nextln + loop + if token <> ELSEIF_TKN + break + fin + emit_branch(tag_endif) + ctag_resolve(tag_else) + if !parse_expr; return FALSE; fin + tag_else = ctag_new + emit_brfls(tag_else) + until FALSE + if token == ELSE_TKN + emit_branch(tag_endif) + ctag_resolve(tag_else) + scan + while parse_stmnt + nextln + loop + ctag_resolve(tag_endif) + else + ctag_resolve(tag_else) + ctag_resolve(tag_endif) + fin + if token <> FIN_TKN; return parse_err(@no_fin); fin + break + is WHILE_TKN + tag_while = ctag_new + tag_wend = ctag_new + tag_prevcnt = cont_tag + cont_tag = tag_while + tag_prevbrk = break_tag + break_tag = tag_wend + ctag_resolve(tag_while) + if !parse_expr; return FALSE; fin + emit_brfls(tag_wend) + while parse_stmnt + nextln + loop + if token <> LOOP_TKN; return parse_err(@no_loop); fin + emit_branch(tag_while) + ctag_resolve(tag_wend) + break_tag = tag_prevbrk + cont_tag = tag_prevcnt + break + is REPEAT_TKN + tag_repeat = ctag_new + tag_prevbrk = break_tag + break_tag = ctag_new + tag_prevcnt = cont_tag + cont_tag = ctag_new + ctag_resolve(tag_repeat) + scan + while parse_stmnt + nextln + loop + if token <> UNTIL_TKN; return parse_err(@no_until); fin + ctag_resolve(cont_tag) + cont_tag = tag_prevcnt + if !parse_expr; return FALSE; fin + emit_brfls(tag_repeat) + ctag_resolve(break_tag) + break_tag = tag_prevbrk + break + is FOR_TKN + stack_loop = stack_loop + 1 + tag_for = ctag_new + tag_prevcnt = cont_tag + cont_tag = tag_for + tag_prevbrk = break_tag + break_tag = ctag_new + if scan <> ID_TKN; return parse_err(@bad_stmnt); fin + idptr = id_lookup(tknptr, tknlen) + if idptr + type = idptr->idtype + addr = idptr=>idval + else + return FALSE + fin + if scan <> SET_TKN; return parse_err(@bad_stmnt); fin + if !parse_expr; return parse_err(@bad_stmnt); fin + ctag_resolve(tag_for) + if type & LOCAL_TYPE + if type & BYTE_TYPE + emit_dlb(addr) + else + emit_dlw(addr) + fin + else + if type & BYTE_TYPE + emit_dab(addr, 0) + else + emit_daw(addr, 0) + fin + fin + if token == TO_TKN + stepdir = 1 + elsif token == DOWNTO_TKN + stepdir = -1 + else + return parse_err(@bad_stmnt) + fin + if !parse_expr; return parse_err(@bad_stmnt); fin + if stepdir > 0 + emit_brgt(break_tag) + else + emit_brlt(break_tag) + fin + if token == STEP_TKN + if !parse_expr; return parse_err(@bad_stmnt); fin + if stepdir > 0 + emit_binaryop(ADD_TKN) + else + emit_binaryop(SUB_TKN) + fin + else + if stepdir > 0 + emit_unaryop(INC_TKN) + else + emit_unaryop(DEC_TKN) + fin + fin + while parse_stmnt + nextln + loop + if token <> NEXT_TKN; return parse_err(@bad_stmnt); fin + emit_branch(tag_for) + cont_tag = tag_prevcnt + ctag_resolve(break_tag) + emit_drop + break_tag = tag_prevbrk + stack_loop = stack_loop - 1 + break + is CASE_TKN + stack_loop = stack_loop + 1 + tag_prevbrk = break_tag + break_tag = ctag_new + tag_choice = ctag_new + tag_of = ctag_new + if !parse_expr; return parse_err(@bad_stmnt); fin + nextln + while token <> ENDCASE_TKN + when token + is OF_TKN + if !parse_expr; return parse_err(@bad_stmnt); fin + emit_brne(tag_choice) + ctag_resolve(tag_of) + while parse_stmnt + nextln + loop + tag_of = ctag_new + if prevstmnt <> BREAK_TKN // Fall through to next OF if no break + emit_branch(tag_of) + fin + ctag_resolve(tag_choice) + tag_choice = ctag_new + break + is DEFAULT_TKN + ctag_resolve(tag_of) + tag_of = 0 + scan + while parse_stmnt + nextln + loop + if token <> ENDCASE_TKN; return parse_err(@bad_stmnt); fin + break + is EOL_TKN + nextln + break + otherwise + return parse_err(@bad_stmnt) + wend + loop + if (tag_of) + ctag_resolve(tag_of) + fin + ctag_resolve(break_tag) + emit_drop + break_tag = tag_prevbrk + stack_loop = stack_loop - 1 + break + is BREAK_TKN + if break_tag + emit_branch(break_tag) + else + return parse_err(@bad_stmnt) + fin + break + is CONT_TKN + if cont_tag + emit_branch(cont_tag) + else + return parse_err(@bad_stmnt) + fin + break + is RETURN_TKN + if infunc + for i = 1 to stack_loop + emit_drop + next + fin + if !parse_expr + emit_const(0) + fin + emit_leave + break + is EOL_TKN + return TRUE + is ELSE_TKN + is ELSEIF_TKN + is FIN_TKN + is LOOP_TKN + is UNTIL_TKN + is NEXT_TKN + is OF_TKN + is DEFAULT_TKN + is ENDCASE_TKN + is END_TKN + is DONE_TKN + is DEF_TKN + return FALSE + is ID_TKN + saveptr = tknptr + idptr = id_lookup(tknptr, tknlen) + if !idptr; return FALSE; fin + type = idptr->idtype + addr = idptr=>idval + if type & VAR_TYPE + elem_type = type + elem_offset = 0 + if scan == DOT_TKN or token == COLON_TKN + // + // Structure member offset + // + if token == DOT_TKN + elem_type = BYTE_TYPE + else + elem_type = WORD_TYPE + fin + if !parse_const(@elem_offset) + token = ID_TKN + else + scan + fin + fin + if token == SET_TKN + if !parse_expr; return parse_err(@bad_expr); fin + if type & LOCAL_TYPE + if elem_type & BYTE_TYPE + emit_slb(addr + elem_offset) + else + emit_slw(addr + elem_offset) + fin + else + if elem_type & BYTE_TYPE + emit_sab(addr, elem_offset) + else + emit_saw(addr, elem_offset) + fin + fin + break + elsif token == INC_TKN or token == DEC_TKN + if type & LOCAL_TYPE + if elem_type & BYTE_TYPE + emit_llb(addr + elem_offset) + emit_unaryop(token) + emit_slb(addr + elem_offset) + else + emit_llw(addr + elem_offset) + emit_unaryop(token) + emit_slw(addr + elem_offset) + fin + else + if elem_type & BYTE_TYPE + emit_lab(addr, elem_offset) + emit_unaryop(token) + emit_sab(addr, elem_offset) + else + emit_law(addr, elem_offset) + emit_unaryop(token) + emit_saw(addr, elem_offset) + fin + fin + break + fin + elsif type & FUNC_TYPE + if scan == EOL_TKN + emit_call(addr) + emit_drop + break + fin + fin + tknptr = saveptr + otherwise + rewind(tknptr) + type = parse_value(0) + if type + if token == SET_TKN + if !parse_expr; return parse_err(@bad_expr); fin + if type & XBYTE_TYPE + emit_sb + else + emit_sw + fin + elsif token == INC_TKN or token == DEC_TKN + emit_dup + if type & XBYTE_TYPE + emit_lb + emit_unaryop(token) + emit_sb + else + emit_lw + emit_unaryop(token) + emit_sw + fin + else + if type & BPTR_TYPE + emit_lb + elsif type & WPTR_TYPE + emit_lw + fin + emit_drop + fin + else + return parse_err(@bad_syntax) + fin + wend + if scan <> EOL_TKN + return parse_err(@bad_syntax) + fin + return TRUE +end +def parse_var(type) + byte consttype, constsize, idlen + word idptr, constval, arraysize, size + + //cout('T') + idlen = 0 + size = 1 + if scan == OPEN_BRACKET_TKN + size = 0 + parse_constexpr(@size, @constsize) + if token <> CLOSE_BRACKET_TKN; return parse_err(@no_close_bracket); fin + scan + fin + if token == ID_TKN + idptr = tknptr + idlen = tknlen + if scan == OPEN_BRACKET_TKN + size = 0 + parse_constexpr(@size, @constsize) + if token <> CLOSE_BRACKET_TKN; return parse_err(@no_close_bracket); fin + scan + fin + fin + if type == WORD_TYPE + size = size * 2 + fin + if token == SET_TKN + if infunc; return parse_err(@no_local_init); fin + if idlen + iddata_add(idptr, idlen, type, 0) + fin + consttype = parse_constexpr(@constval, @constsize) + if consttype + arraysize = emit_data(type, consttype, constval, constsize) + while token == COMMA_TKN + consttype = parse_constexpr(@constval, @constsize) + if consttype + arraysize = arraysize + emit_data(type, consttype, constval, constsize) + else + return parse_err(@bad_decl) + fin + loop + iddata_size(PTR_TYPE, size, arraysize) + else + return parse_err(@bad_decl) + fin + elsif idlen + if infunc + idlocal_add(idptr, idlen, type, size) + else + iddata_add(idptr, idlen, type, size) + fin + fin + return TRUE +end +def parse_struc + byte strucid[16] + byte type, idlen, struclen, constsize + word size, offset, idstr + + //cout('S') + struclen = 0 + if scan == ID_TKN + struclen = tknlen + if struclen > 16 + struclen = 16 + fin + for idlen = 0 to struclen + strucid[idlen] = ^(tknptr + idlen) + next + fin + offset = 0 + while nextln == BYTE_TKN or token == WORD_TKN + size = 1 + if token == BYTE_TKN + type = BYTE_TYPE + else + type = WORD_TYPE + fin + if scan == OPEN_BRACKET_TKN + size = 0 + parse_constexpr(@size, @constsize) + if token <> CLOSE_BRACKET_TKN; return parse_err(@no_close_bracket); fin + scan + fin + repeat + idlen = 0; + if token == ID_TKN + idstr = tknptr + idlen = tknlen + if scan == OPEN_BRACKET_TKN + size = 0 + parse_constexpr(@size, @constsize) + if token <> CLOSE_BRACKET_TKN; return parse_err(@no_close_bracket); fin + scan + fin + fin + if type & WORD_TYPE + size = size * 2 + fin + if idlen + idconst_add(idstr, idlen, offset) + fin + offset = offset + size + until token <> COMMA_TKN + if token <> EOL_TKN; return FALSE; fin + loop + if struclen + idconst_add(@strucid, struclen, offset) + fin + return token == END_TKN +end +def parse_vars + byte idlen, type, size + word value, idptr + + //cout('V') + when token + is CONST_TKN + if scan <> ID_TKN + return parse_err(@bad_cnst) + fin + idptr = tknptr + idlen = tknlen + if scan <> SET_TKN + return parse_err(@bad_cnst) + fin + if !parse_constexpr(@value, @size) + return parse_err(@bad_cnst) + fin + idconst_add(idptr, idlen, value) + break + is STRUC_TKN + if !parse_struc; parse_err(@bad_struc); fin + break + is BYTE_TKN + is WORD_TKN + if token == BYTE_TKN + type = BYTE_TYPE + else + type = WORD_TYPE + fin + repeat + if !parse_var(type) + return FALSE + fin + until token <> COMMA_TKN + break + is PREDEF_TKN + repeat + if scan == ID_TKN + idfunc_add(tknptr, tknlen, ctag_new) + else + return parse_err(@bad_decl) + fin + until scan <> COMMA_TKN + break + is EOL_TKN + break + otherwise + return FALSE + wend + return TRUE +end +def parse_defs + byte cfnparms + word func_tag, idptr + + if token == DEF_TKN + //cout('D') + if scan <> ID_TKN; return parse_err(@bad_decl); fin + cfnparms = 0 + infunc = TRUE + idptr = idglobal_lookup(tknptr, tknlen) + if idptr + func_tag = idptr=>idval + else + func_tag = ctag_new + idfunc_add(tknptr, tknlen, func_tag) + fin + ctag_resolve(func_tag) + retfunc_tag = ctag_new + idlocal_init + if scan == OPEN_PAREN_TKN + repeat + if scan == ID_TKN + cfnparms = cfnparms + 1 + idlocal_add(tknptr, tknlen, WORD_TYPE, 2) + scan + fin + until token <> COMMA_TKN + if token <> CLOSE_PAREN_TKN + return parse_err(@bad_decl) + fin + scan + fin + while parse_vars + nextln + loop + emit_enter(cfnparms) + prevstmnt = 0 + while parse_stmnt + nextln + loop + infunc = FALSE + if token <> END_TKN; return parse_err(@bad_syntax); fin + if scan <> EOL_TKN; return parse_err(@bad_syntax); fin + if prevstmnt <> RETURN_TKN + emit_const(0) + emit_leave + fin + return TRUE + elsif token == EOL_TKN + return TRUE + fin + return FALSE +end +def parse_module + idglobal_init + idlocal_init + srcref = fileio:open(@srcfile) + if srcref + fileio:newline(srcref, $7F, $0D) + refnum = srcref + parsefile = @srcfile + if nextln + // + // Compile module + // + while parse_vars + nextln + loop + while parse_defs + nextln + loop + framesize = 0 + entrypoint = codeptr + emit_enter(0) + prevstmnt = 0 + if token <> DONE_TKN + while parse_stmnt + nextln + loop + fin + if prevstmnt <> RETURN_TKN + emit_const(0) + emit_leave + fin + fileio:close(srcref) + //dumpsym(idglobal_tbl, globals) + // + // Write REL file + // + + return not parserr + fin + else + puts("Error opening: "); puts(@srcfile); putln + fin + return FALSE +end diff --git a/src/toolsrc/plasm.pla b/src/toolsrc/plasm.pla new file mode 100644 index 0000000..d2519ec --- /dev/null +++ b/src/toolsrc/plasm.pla @@ -0,0 +1,340 @@ +// +// Data and text buffer constants +// +const machid = $BF98 +const iobuffer = $0800 +const databuff = $0C00 +const codebuff = $A900 +const codebuffsz = $1000 +// +// Compiler variables +// +// +// Tokens +// +const ID_TKN = $D6 // V +const CHR_TKN = $C3 // C +const INT_TKN = $C9 // I +const STR_TKN = $D3 // S +const EOL_TKN = $02 +const EOF_TKN = $01 +const ERR_TKN = $00 +// +//Ternary operand operators +// +const TERNARY_TOKEN = $BF // ? +const TRIELSE_TOKEN = $DF // _ +// +// Binary operand operators +// +const SET_TKN = $BD // = +const ADD_TKN = $AB // + +const SUB_TKN = $AD // - +const MUL_TKN = $AA // * +const DIV_TKN = $AF // / +const MOD_TKN = $A5 // % +const OR_TKN = $FC // | +const EOR_TKN = $DE // ^ +const AND_TKN = $A6 // & +const SHR_TKN = $D2 // R +const SHL_TKN = $CC // L +const GT_TKN = $BE // > +const GE_TKN = $C8 // H +const LT_TKN = $BC // < +const LE_TKN = $C2 // B +const NE_TKN = $D5 // U +const EQ_TKN = $C5 // E +const LOGIC_AND_TKN = $CE // N +const LOGIC_OR_TKN = $CF // O +// +// Unary operand operators +// +const AT_TKN = $C0 // @ +const DOT_TKN = $AE // . +const COLON_TKN = $BA // : +const NEG_TKN = $AD // - +const POUND_TKN = $A3 // # +const COMP_TKN = $FE // ~ +const LOGIC_NOT_TKN = $A1 // ! +const BPTR_TKN = $DE // ^ +const WPTR_TKN = $AA // * +const PTRB_TKN = $D8 // X +const PTRW_TKN = $D7 // W +const INC_TKN = $C1 // A +const DEC_TKN = $C4 // D +// +// Enclosure tokens +// +const OPEN_PAREN_TKN = $A8 // ( +const CLOSE_PAREN_TKN = $A9 // ) +const OPEN_BRACKET_TKN = $DB // [ +const CLOSE_BRACKET_TKN = $DD // ] +// +// Misc. tokens +// +const COMMA_TKN = $AC // , +//const COMMENT_TKN = $BB // // +// +// Keyword tokens +// +const CONST_TKN = $80 +const BYTE_TKN = $81 +const WORD_TKN = $82 +const IF_TKN = $83 +const ELSEIF_TKN = $84 +const ELSE_TKN = $85 +const FIN_TKN = $86 +const END_TKN = $87 +const WHILE_TKN = $88 +const LOOP_TKN = $89 +const CASE_TKN = $8A +const OF_TKN = $8B +const DEFAULT_TKN = $8C +const ENDCASE_TKN = $8D +const FOR_TKN = $8E +const TO_TKN = $8F +const DOWNTO_TKN = $90 +const STEP_TKN = $91 +const NEXT_TKN = $92 +const REPEAT_TKN = $93 +const UNTIL_TKN = $94 +const DEF_TKN = $95 +const STRUC_TKN = $96 +const SYSFLAGS_TKN = $97 +const DONE_TKN = $98 +const RETURN_TKN = $99 +const BREAK_TKN = $9A +const CONT_TKN = $9B +const PREDEF_TKN = $9C +const IMPORT_TKN = $9D +const EXPORT_TKN = $9E +const INCLUDE_TKN = $9F +// +// Types +// +const CONST_TYPE = $01 +const BYTE_TYPE = $02 +const WORD_TYPE = $04 +const VAR_TYPE = $06 // (WORD_TYPE | BYTE_TYPE) +const FUNC_TYPE = $08 +const FUNC_CONST_TYPE = $09 +const ADDR_TYPE = $0E // (VAR_TYPE | FUNC_TYPE) +const LOCAL_TYPE = $10 +const BPTR_TYPE = $20 +const WPTR_TYPE = $40 +const PTR_TYPE = $60 // (BPTR_TYPE | WPTR_TYPE) +const XBYTE_TYPE = $22 // (BPTR_TYPE | BYTE_TYPE) +const XWORD_TYPE = $44 // (WPTR_TYPE | WORD_TYPE) +const CONSTADDR_TYPE = $61 // (CONST_TYPE | PTR_TYPE) +const STR_TYPE = $80 +// +// Keywords +// +byte keywrds = "IF", IF_TKN +byte = "TO", TO_TKN +byte = "IS", OF_TKN +byte = "OR", LOGIC_OR_TKN +byte = "FOR", FOR_TKN +byte = "FIN", FIN_TKN +byte = "DEF", DEF_TKN +byte = "END", END_TKN +byte = "AND", LOGIC_AND_TKN +byte = "NOT", LOGIC_NOT_TKN +byte = "BYTE", BYTE_TKN +byte = "WORD", WORD_TKN +byte = "ELSE", ELSE_TKN +byte = "NEXT", NEXT_TKN +byte = "WHEN", CASE_TKN +byte = "LOOP", LOOP_TKN +byte = "STEP", STEP_TKN +byte = "DONE", DONE_TKN +byte = "WEND", ENDCASE_TKN +byte = "CONST", CONST_TKN +byte = "STRUC", STRUC_TKN +byte = "ELSIF", ELSEIF_TKN +byte = "WHILE", WHILE_TKN +byte = "UNTIL", UNTIL_TKN +byte = "BREAK", BREAK_TKN +byte = "IMPORT", IMPORT_TKN +byte = "EXPORT", EXPORT_TKN +byte = "DOWNTO", DOWNTO_TKN +byte = "REPEAT", REPEAT_TKN +byte = "RETURN", RETURN_TKN +byte = "PREDEF", PREDEF_TKN +byte = "INCLUDE", INCLUDE_TKN +byte = "CONTINUE", CONT_TKN +byte = "SYSFLAGS", SYSFLAGS_TKN +byte = "OTHERWISE",DEFAULT_TKN +byte = $FF +// +// Mathematical ops +// +const bops_tblsz = 17 // minus 1 +byte[] bops_tbl // Highest precedence +byte = MUL_TKN, DIV_TKN, MOD_TKN +byte = ADD_TKN, SUB_TKN +byte = SHR_TKN, SHL_TKN +byte = AND_TKN +byte = EOR_TKN +byte = OR_TKN +byte = GT_TKN, GE_TKN, LT_TKN, LE_TKN +byte = EQ_TKN, NE_TKN +byte = LOGIC_AND_TKN +byte = LOGIC_OR_TKN + // Lowest precedence +byte[] bops_prec // Highest precedence +byte = 1, 1, 1 +byte = 2, 2 +byte = 3, 3 +byte = 4 +byte = 5 +byte = 6 +byte = 7, 7, 7, 7 +byte = 8, 8 +byte = 9 +byte = 10 + // Lowest precedence +byte[16] opstack +byte[16] precstack +word opsp = -1 +word[16] valstack +byte[16] sizestack +byte[16] typestack +word valsp = -1 +// +// Symbol table variables +// +struc t_opseq + byte code + word val + word tag + word offsz + byte type + word nextop +end +const OPSEQNUM = 200 +const idglobal_tblsz = 2048 +const idlocal_tblsz = 512 +const idglobal_tbl = $1600 +const idlocal_tbl = $1E00 +const ctag_max = 1024 +const ctag_tbl = $800 +const idval = 0 +const idtype = 2 +const idname = 3 +const idrecsz = 4 +word globals = 0 +word datasize = 0 +word lastglobal +byte locals = 0 +word framesize = 0 +word lastlocal +const IS_RESOLVED = $8000 +const IS_RELATIVE = $8000 +const IS_CTAG = $8000 +const MASK_CTAG = $7FFF +word codetag = -1 +word codeptr, entrypoint = 0 +word modsysflags = 0 +byte lastop = $FF +// +// Scanner variables +// +const inbuff = $0200 +const instr = $01FF +word scanptr = @nullstr +byte scanchr, token, tknlen +byte parserrpos, parserr = 0 +word tknptr, parserrln +word constval +word lineno = 0 +// +// Compiler output messages +// +byte dup_id[] = "DUPLICATE IDENTIFIER" +byte undecl_id[] = "UNDECLARED IDENTIFIER" +byte bad_cnst[] = "BAD CONSTANT" +byte bad_struc[] = "BAD STRUCTURE" +byte bad_offset[] = "BAD STRUCT OFFSET" +byte bad_decl[] = "BAD DECLARATION" +byte bad_op[] = "BAD OPERATION" +byte bad_stmnt[] = "BAD STATMENT" +byte bad_expr[] = "BAD EXPRESSION" +byte bad_syntax[] = "BAD SYNTAX" +byte local_overflw[] = "LOCAL FRAME OVERFLOW" +byte global_sym_overflw[] = "GLOBAL SYMBOL TABLE OVERFLOW" +byte local_sym_overflw[] = "LOCAL SYMBOL TABLE OVERFLOW" +byte ctag_full[] = "CODE LABEL OVERFLOW" +byte no_ctag_offst[] = "CODE OFFSET NOT SUPPORTED" +byte no_close_paren[] = "MISSING CLOSING PAREN" +byte no_close_bracket[] = "MISSING CLOSING BRACKET" +byte missing_op[] = "MISSING OPERAND" +byte no_fin[] = "MISSING FIN" +byte no_loop[] = "MISSING LOOP" +byte no_until[] = "MISSING UNTIL" +byte no_done[] = "MISSING DONE" +byte no_local_init[] = "NO INITIALIZED LOCALS" +// +// ProDOS/SOS file references +// +byte refnum, srcref, incref +byte[32] srcfile +byte[32] incfile +word parsefile // Pointer to current file +word sysincbuf // System I/O buffer for include files +word srcline // Saved source line number +// +// Parser variables +// +byte[128] strconst +byte infunc = 0 +byte stack_loop = 0 +byte prevstmnt = 0 +word retfunc_tag = 0 +word break_tag = 0 +word cont_tag = 0 +predef parse_constexpr(str,val), parse_expr + +//===================================== +// +// PLASMA Compiler +// +//===================================== + +// +// Error handler +// +def parse_err(errstr) + if !parserr + parserr = TRUE + parserrln = lineno - 1 + parserrpos = tknptr - inbuff + puts(parsefile); putc('['); puti(lineno); putc(']'); putc(':'); puts(errstr); putln + puts(instr) + for i = parseerrpos-1 downto 0 + putc(' ') + next + puts("^\n") + fin + return ERR_TKN +end +// +// Include code to reduce size of this file +// +include "toolsrc/codegen.pla" +include "toolsrc/lex.pla" +include "toolsrc/parse.pla" +// +// Look at command line arguments and compile module +// +arg = argNext(argFirst) +if arg + strcpy(@srcfile, arg) + if parsemodule + puts("Bytes compiled: "); puti(codeptr - codebuff); putln + fin +else + puts("Usage: +PLASM [srcfile]\n") +fin + +done