diff --git a/src/toolsrc/codegen.c b/src/toolsrc/codegen.c index 942a4e3..0949de2 100755 --- a/src/toolsrc/codegen.c +++ b/src/toolsrc/codegen.c @@ -27,8 +27,9 @@ static int idlocal_offset[128]; static char fixup_size[2048]; static int fixup_type[2048]; static int fixup_tag[2048]; -static t_opseq optbl[256]; +static t_opseq optbl[2048]; static t_opseq *freeop_lst = &optbl[0]; +static t_opseq *pending_seq = 0; #define FIXUP_BYTE 0x00 #define FIXUP_WORD 0x80 int id_match(char *name, int len, char *id) @@ -536,10 +537,12 @@ int emit_data(int vartype, int consttype, long constval, int constsize) } void emit_codetag(int tag) { + emit_pending_seq(); printf("_B%03d%c\n", tag, LBL); } void emit_const(int cval) { + emit_pending_seq(); if (cval == 0x0000) printf("\t%s\t$00\t\t\t; ZERO\n", DB); else if ((cval & 0xFF00) == 0x0000) @@ -652,17 +655,27 @@ void emit_saw(int tag, int offset, int type) } void emit_dab(int tag, int offset, int type) { - int fixup = fixup_new(tag, type, FIXUP_WORD); - char *taglbl = tag_string(tag, type); - printf("\t%s\t$7C\t\t\t; DAB\t%s+%d\n", DB, taglbl, offset); - printf("_F%03d%c\t%s\t%s+%d\t\t\n", fixup, LBL, DW, type & EXTERN_TYPE ? "0" : taglbl, offset); + if (type) + { + int fixup = fixup_new(tag, type, FIXUP_WORD); + char *taglbl = tag_string(tag, type); + printf("\t%s\t$7C\t\t\t; DAB\t%s+%d\n", DB, taglbl, offset); + printf("_F%03d%c\t%s\t%s+%d\t\t\n", fixup, LBL, DW, type & EXTERN_TYPE ? "0" : taglbl, offset); + } + else + printf("\t%s\t$7C,$%02X,$%02X\t\t; DAB\t%d\n", DB, offset&0xFF,(offset>>8)&0xFF, offset); } void emit_daw(int tag, int offset, int type) { - int fixup = fixup_new(tag, type, FIXUP_WORD); - char *taglbl = tag_string(tag, type); - printf("\t%s\t$7E\t\t\t; DAW\t%s+%d\n", DB, taglbl, offset); - printf("_F%03d%c\t%s\t%s+%d\t\t\n", fixup, LBL, DW, type & EXTERN_TYPE ? "0" : taglbl, offset); + if (type) + { + int fixup = fixup_new(tag, type, FIXUP_WORD); + char *taglbl = tag_string(tag, type); + printf("\t%s\t$7E\t\t\t; DAW\t%s+%d\n", DB, taglbl, offset); + printf("_F%03d%c\t%s\t%s+%d\t\t\n", fixup, LBL, DW, type & EXTERN_TYPE ? "0" : taglbl, offset); + } + else + printf("\t%s\t$7E,$%02X,$%02X\t\t; DAW\t%d\n", DB, offset&0xFF,(offset>>8)&0xFF, offset); } void emit_localaddr(int index) { @@ -695,6 +708,7 @@ void emit_brtru(int tag) } void emit_brnch(int tag) { + emit_pending_seq(); printf("\t%s\t$50\t\t\t; BRNCH\t_B%03d\n", DB, tag); printf("\t%s\t_B%03d-*\n", DW, tag); } @@ -705,16 +719,19 @@ void emit_breq(int tag) } void emit_brne(int tag) { + emit_pending_seq(); printf("\t%s\t$3E\t\t\t; BRNE\t_B%03d\n", DB, tag); printf("\t%s\t_B%03d-*\n", DW, tag); } void emit_brgt(int tag) { + emit_pending_seq(); printf("\t%s\t$38\t\t\t; BRGT\t_B%03d\n", DB, tag); printf("\t%s\t_B%03d-*\n", DW, tag); } void emit_brlt(int tag) { + emit_pending_seq(); printf("\t%s\t$3A\t\t\t; BRLT\t_B%03d\n", DB, tag); printf("\t%s\t_B%03d-*\n", DW, tag); } @@ -739,6 +756,7 @@ void emit_ical(void) } void emit_leave(void) { + emit_pending_seq(); if (localsize) printf("\t%s\t$5A\t\t\t; LEAVE\n", DB); else @@ -746,6 +764,7 @@ void emit_leave(void) } void emit_ret(void) { + emit_pending_seq(); printf("\t%s\t$5C\t\t\t; RET\n", DB); } void emit_enter(int cparams) @@ -769,6 +788,7 @@ void emit_pull_exp(void) } void emit_drop(void) { + emit_pending_seq(); printf("\t%s\t$30\t\t\t; DROP\n", DB); } void emit_dup(void) @@ -777,6 +797,7 @@ void emit_dup(void) } int emit_unaryop(t_token op) { + emit_pending_seq(); switch (op) { case NEG_TOKEN: @@ -808,6 +829,7 @@ int emit_unaryop(t_token op) } int emit_op(t_token op) { + emit_pending_seq(); switch (op) { case MUL_TOKEN: @@ -908,12 +930,69 @@ void release_seq(t_opseq *seq) freeop_lst = op; } } +/* + * Indicate if an address is (or might be) memory-mapped hardware; used to avoid + * optimising away accesses to such addresses. + */ +int is_hardware_address(int addr) +{ + // TODO: I think this is reasonable for Apple hardware but I'm not sure. + // It's a bit too strong for Acorn hardware but code is unlikely to try to + // read from high addresses anyway, so there's no real harm in not + // optimising such accesses anyway. + return addr >= 0xC000; +} +/* + * Replace all but the first of a series of identical load opcodes by DUP. This + * doesn't reduce the number of opcodes but does reduce their size in bytes. + * This is only called on the second optimisation pass because the DUP opcodes + * may inhibit other peephole optimisations which are more valuable. + */ +int try_dupify(t_opseq *op) +{ + int crunched = 0; + t_opseq *opn = op->nextop; + for (; opn; opn = opn->nextop) + { + if (op->code != opn->code) + return crunched; + + switch (op->code) + { + case CONST_CODE: + if (op->val != opn->val) + return crunched; + break; + case LADDR_CODE: + case LLB_CODE: + case LLW_CODE: + if (op->offsz != opn->offsz) + return crunched; + break; + case GADDR_CODE: + case LAB_CODE: + case LAW_CODE: + if ((op->tag != opn->tag) || (op->offsz != opn->offsz) || + (op->type != opn->type)) + return crunched; + break; + + default: + return crunched; + } + + opn->code = DUP_CODE; + crunched = 1; + } + + return crunched; +} /* * Crunch sequence (peephole optimize) */ -int crunch_seq(t_opseq **seq) +int crunch_seq(t_opseq **seq, int pass) { - t_opseq *opnext, *opnextnext; + t_opseq *opnext, *opnextnext, *opprev = 0; t_opseq *op = *seq; int crunched = 0; int freeops = 0; @@ -977,15 +1056,7 @@ int crunch_seq(t_opseq **seq) break; case BRFALSE_CODE: if (op->val) - { - opnextnext = opnext->nextop; // Remove never taken branch - if (op == *seq) - *seq = opnextnext; - opnext->nextop = NULL; - release_seq(op); - opnext = opnextnext; - crunched = 1; - } + freeops = -2; // Remove constant and never taken branch else { op->code = BRNCH_CODE; // Always taken branch @@ -995,15 +1066,7 @@ int crunch_seq(t_opseq **seq) break; case BRTRUE_CODE: if (!op->val) - { - opnextnext = opnext->nextop; // Remove never taken branch - if (op == *seq) - *seq = opnextnext; - opnext->nextop = NULL; - release_seq(op); - opnext = opnextnext; - crunched = 1; - } + freeops = -2; // Remove constant never taken branch else { op->code = BRNCH_CODE; // Always taken branch @@ -1011,7 +1074,19 @@ int crunch_seq(t_opseq **seq) freeops = 1; } break; - case CONST_CODE: // Collapse constant operation + case NE_CODE: + if (!op->val) + freeops = -2; // Remove ZERO:ISNE + break; + case EQ_CODE: + if (!op->val) + { + op->code = LOGIC_NOT_CODE; + freeops = 1; + } + break; + case CONST_CODE: + // Collapse constant operation if ((opnextnext = opnext->nextop)) switch (opnextnext->code) { @@ -1088,6 +1163,9 @@ int crunch_seq(t_opseq **seq) freeops = 2; break; } + // End of collapse constant operation + if ((pass > 0) && (freeops == 0) && (op->val != 0)) + crunched = try_dupify(op); break; // CONST_CODE case BINARY_CODE(MUL_TOKEN): for (shiftcnt = 0; shiftcnt < 16; shiftcnt++) @@ -1148,6 +1226,8 @@ int crunch_seq(t_opseq **seq) freeops = 1; break; } + if ((pass > 0) && (freeops == 0)) + crunched = try_dupify(op); break; // LADDR_CODE case GADDR_CODE: switch (opnext->code) @@ -1188,7 +1268,54 @@ int crunch_seq(t_opseq **seq) freeops = 1; break; } + if ((pass > 0) && (freeops == 0)) + crunched = try_dupify(op); break; // GADDR_CODE + case LLB_CODE: + if (pass > 0) + crunched = try_dupify(op); + break; // LLB_CODE + case LLW_CODE: + // LLW [n]:CB 8:SHR -> LLB [n+1] + if ((opnext->code == CONST_CODE) && (opnext->val == 8)) + { + if ((opnextnext = opnext->nextop)) + { + if (opnextnext->code == SHR_CODE) + { + op->code = LLB_CODE; + op->offsz++; + freeops = 2; + break; + } + } + } + if ((pass > 0) && (freeops == 0)) + crunched = try_dupify(op); + break; // LLW_CODE + case LAB_CODE: + if ((pass > 0) && (op->type || !is_hardware_address(op->offsz))) + crunched = try_dupify(op); + break; // LAB_CODE + case LAW_CODE: + // LAW x:CB 8:SHR -> LAB x+1 + if ((opnext->code == CONST_CODE) && (opnext->val == 8)) + { + if ((opnextnext = opnext->nextop)) + { + if (opnextnext->code == SHR_CODE) + { + op->code = LAB_CODE; + op->offsz++; + freeops = 2; + break; + } + } + } + if ((pass > 0) && (freeops == 0) && + (op->type || !is_hardware_address(op->offsz))) + crunched = try_dupify(op); + break; // LAW_CODE case LOGIC_NOT_CODE: switch (opnext->code) { @@ -1204,10 +1331,64 @@ int crunch_seq(t_opseq **seq) break; } break; // LOGIC_NOT_CODE + case SLB_CODE: + if ((opnext->code == LLB_CODE) && (op->offsz == opnext->offsz)) + { + op->code = DLB_CODE; + freeops = 1; + } + break; // SLB_CODE + case SLW_CODE: + if ((opnext->code == LLW_CODE) && (op->offsz == opnext->offsz)) + { + op->code = DLW_CODE; + freeops = 1; + } + break; // SLW_CODE + case SAB_CODE: + if ((opnext->code == LAB_CODE) && (op->tag == opnext->tag) && + (op->offsz == opnext->offsz) && (op->type == opnext->type)) + { + op->code = DAB_CODE; + freeops = 1; + } + break; // SAB_CODE + case SAW_CODE: + if ((opnext->code == LAW_CODE) && (op->tag == opnext->tag) && + (op->offsz == opnext->offsz) && (op->type == opnext->type)) + { + op->code = DAW_CODE; + freeops = 1; + } + break; // SAW_CODE } // - // Free up crunched ops - // + // Free up crunched ops. If freeops is positive we free up that many ops + // *after* op; if it's negative, we free up abs(freeops) ops *starting + // with* op. + if (freeops < 0) + { + freeops = -freeops; + // If op is at the start of the sequence, we treat this as a special + // case. + if (op == *seq) + { + for (; freeops > 0; --freeops) + { + opnext = op->nextop; + release_op(op); + op = *seq = opnext; + } + crunched = 1; + } + // Otherwise we just move op back to point to the previous op and + // let the following loop remove the required number of ops. + else + { + op = opprev; + opnext = op->nextop; + } + } while (freeops) { op->nextop = opnext->nextop; @@ -1217,6 +1398,7 @@ int crunch_seq(t_opseq **seq) crunched = 1; freeops--; } + opprev = op; op = opnext; } return (crunched); @@ -1261,18 +1443,57 @@ t_opseq *cat_seq(t_opseq *seq1, t_opseq *seq2) return (seq1); } /* - * Emit a sequence of ops + * Emit a sequence of ops (into the pending sequence) */ int emit_seq(t_opseq *seq) { + t_opseq *op; + int emitted = 0; + int string = 0; + for (op = seq; op; op = op->nextop) + { + if (op->code == STR_CODE) + string = 1; + emitted++; + } + pending_seq = cat_seq(pending_seq, seq); + // The source code comments in the output are much more logical if we don't + // merge multiple sequences together. There's no value in doing this merging + // if we're not optimizing, and we optionally allow it to be prevented even + // when we are optimizing by specifing the -N (NO_COMBINE) flag. + // + // We must also force output if the sequence includes a CS opcode, as the + // associated 'constant' is only temporarily valid. + if (!(outflags & OPTIMIZE) || (outflags & NO_COMBINE) || string) + return emit_pending_seq(); + return (emitted); +} +/* + * Emit the pending sequence + */ +int emit_pending_seq() +{ + // This is called by some of the emit_*() functions to ensure that any + // pending ops are emitted before they emit their own op when they are + // called from the parser. However, this function itself calls some of those + // emit_*() functions to emit instructions from the pending sequence, which + // would cause an infinite loop if we weren't careful. We therefore set + // pending_seq to null on entry and work with a local copy, so if this + // function calls back into itself it is a no-op. + if (!pending_seq) + return 0; + t_opseq *local_pending_seq = pending_seq; + pending_seq = 0; + t_opseq *op; int emitted = 0; if (outflags & OPTIMIZE) - while (crunch_seq(&seq)); - while (seq) + for (int pass = 0; pass < 2; pass++) + while (crunch_seq(&local_pending_seq, pass)); + while (local_pending_seq) { - op = seq; + op = local_pending_seq; switch (op->code) { case NEG_CODE: @@ -1402,7 +1623,7 @@ int emit_seq(t_opseq *seq) return (0); } emitted++; - seq = seq->nextop; + local_pending_seq = local_pending_seq->nextop; /* * Free this op */ diff --git a/src/toolsrc/codegen.h b/src/toolsrc/codegen.h index b25f0b4..9cb750b 100755 --- a/src/toolsrc/codegen.h +++ b/src/toolsrc/codegen.h @@ -8,31 +8,31 @@ typedef struct _opseq { } t_opseq; #define UNARY_CODE(tkn) ((tkn)|0x0100) #define BINARY_CODE(tkn) ((tkn)|0x0200) -#define NEG_CODE 0x0100|NEG_TOKEN -#define COMP_CODE 0x0100|COMP_TOKEN -#define LOGIC_NOT_CODE 0x0100|LOGIC_NOT_TOKEN -#define INC_CODE 0x0100|INC_TOKEN -#define DEC_CODE 0x0100|DEC_TOKEN -#define BPTR_CODE 0x0100|BPTR_TOKEN -#define WPTR_CODE 0x0100|WPTR_TOKEN -#define MUL_CODE 0x0200|MUL_TOKEN -#define DIV_CODE 0x0200|DIV_TOKEN -#define MOD_CODE 0x0200|MOD_TOKEN -#define ADD_CODE 0x0200|ADD_TOKEN -#define SUB_CODE 0x0200|SUB_TOKEN -#define SHL_CODE 0x0200|SHL_TOKEN -#define SHR_CODE 0x0200|SHR_TOKEN -#define AND_CODE 0x0200|AND_TOKEN -#define OR_CODE 0x0200|OR_TOKEN -#define EOR_CODE 0x0200|EOR_TOKEN -#define EQ_CODE 0x0200|EQ_TOKEN -#define NE_CODE 0x0200|NE_TOKEN -#define GE_CODE 0x0200|GE_TOKEN -#define LT_CODE 0x0200|LT_TOKEN -#define GT_CODE 0x0200|GT_TOKEN -#define LE_CODE 0x0200|LE_TOKEN -#define LOGIC_OR_CODE 0x0200|LOGIC_OR_TOKEN -#define LOGIC_AND_CODE 0x0200|LOGIC_AND_TOKEN +#define NEG_CODE (0x0100|NEG_TOKEN) +#define COMP_CODE (0x0100|COMP_TOKEN) +#define LOGIC_NOT_CODE (0x0100|LOGIC_NOT_TOKEN) +#define INC_CODE (0x0100|INC_TOKEN) +#define DEC_CODE (0x0100|DEC_TOKEN) +#define BPTR_CODE (0x0100|BPTR_TOKEN) +#define WPTR_CODE (0x0100|WPTR_TOKEN) +#define MUL_CODE (0x0200|MUL_TOKEN) +#define DIV_CODE (0x0200|DIV_TOKEN) +#define MOD_CODE (0x0200|MOD_TOKEN) +#define ADD_CODE (0x0200|ADD_TOKEN) +#define SUB_CODE (0x0200|SUB_TOKEN) +#define SHL_CODE (0x0200|SHL_TOKEN) +#define SHR_CODE (0x0200|SHR_TOKEN) +#define AND_CODE (0x0200|AND_TOKEN) +#define OR_CODE (0x0200|OR_TOKEN) +#define EOR_CODE (0x0200|EOR_TOKEN) +#define EQ_CODE (0x0200|EQ_TOKEN) +#define NE_CODE (0x0200|NE_TOKEN) +#define GE_CODE (0x0200|GE_TOKEN) +#define LT_CODE (0x0200|LT_TOKEN) +#define GT_CODE (0x0200|GT_TOKEN) +#define LE_CODE (0x0200|LE_TOKEN) +#define LOGIC_OR_CODE (0x0200|LOGIC_OR_TOKEN) +#define LOGIC_AND_CODE (0x0200|LOGIC_AND_TOKEN) #define CONST_CODE 0x0300 #define STR_CODE 0x0301 #define LB_CODE 0x0302 @@ -142,7 +142,8 @@ void emit_start(void); void emit_rld(void); void emit_esd(void); void release_seq(t_opseq *seq); -int crunch_seq(t_opseq **seq); +int crunch_seq(t_opseq **seq, int pass); t_opseq *gen_seq(t_opseq *seq, int opcode, long cval, int tag, int offsz, int type); t_opseq *cat_seq(t_opseq *seq1, t_opseq *seq2); int emit_seq(t_opseq *seq); +int emit_pending_seq(); diff --git a/src/toolsrc/plasm.c b/src/toolsrc/plasm.c index d111694..b01a4cd 100755 --- a/src/toolsrc/plasm.c +++ b/src/toolsrc/plasm.c @@ -24,6 +24,9 @@ int main(int argc, char **argv) case 'O': outflags |= OPTIMIZE; break; + case 'N': + outflags |= NO_COMBINE; + break; case 'W': outflags |= WARNINGS; } diff --git a/src/toolsrc/plasm.h b/src/toolsrc/plasm.h index f3c4432..c49c669 100755 --- a/src/toolsrc/plasm.h +++ b/src/toolsrc/plasm.h @@ -8,6 +8,7 @@ #define INIT (1<<4) #define SYSFLAGS (1<<5) #define WARNINGS (1<<6) +#define NO_COMBINE (1<<7) extern int outflags; #include "tokens.h" #include "lex.h"