1
0
mirror of https://github.com/dschmenk/PLASMA.git synced 2025-03-26 12:30:21 +00:00

Merge pull request #19 from ZornsLemma/optimise3

New optimizations - Thanks Steve!
This commit is contained in:
David Schmenk 2017-07-28 16:36:36 -07:00 committed by GitHub
commit 92cf12e209
4 changed files with 289 additions and 63 deletions

View File

@ -27,8 +27,9 @@ static int idlocal_offset[128];
static char fixup_size[2048];
static int fixup_type[2048];
static int fixup_tag[2048];
static t_opseq optbl[256];
static t_opseq optbl[2048];
static t_opseq *freeop_lst = &optbl[0];
static t_opseq *pending_seq = 0;
#define FIXUP_BYTE 0x00
#define FIXUP_WORD 0x80
int id_match(char *name, int len, char *id)
@ -536,10 +537,12 @@ int emit_data(int vartype, int consttype, long constval, int constsize)
}
void emit_codetag(int tag)
{
emit_pending_seq();
printf("_B%03d%c\n", tag, LBL);
}
void emit_const(int cval)
{
emit_pending_seq();
if (cval == 0x0000)
printf("\t%s\t$00\t\t\t; ZERO\n", DB);
else if ((cval & 0xFF00) == 0x0000)
@ -652,17 +655,27 @@ void emit_saw(int tag, int offset, int type)
}
void emit_dab(int tag, int offset, int type)
{
int fixup = fixup_new(tag, type, FIXUP_WORD);
char *taglbl = tag_string(tag, type);
printf("\t%s\t$7C\t\t\t; DAB\t%s+%d\n", DB, taglbl, offset);
printf("_F%03d%c\t%s\t%s+%d\t\t\n", fixup, LBL, DW, type & EXTERN_TYPE ? "0" : taglbl, offset);
if (type)
{
int fixup = fixup_new(tag, type, FIXUP_WORD);
char *taglbl = tag_string(tag, type);
printf("\t%s\t$7C\t\t\t; DAB\t%s+%d\n", DB, taglbl, offset);
printf("_F%03d%c\t%s\t%s+%d\t\t\n", fixup, LBL, DW, type & EXTERN_TYPE ? "0" : taglbl, offset);
}
else
printf("\t%s\t$7C,$%02X,$%02X\t\t; DAB\t%d\n", DB, offset&0xFF,(offset>>8)&0xFF, offset);
}
void emit_daw(int tag, int offset, int type)
{
int fixup = fixup_new(tag, type, FIXUP_WORD);
char *taglbl = tag_string(tag, type);
printf("\t%s\t$7E\t\t\t; DAW\t%s+%d\n", DB, taglbl, offset);
printf("_F%03d%c\t%s\t%s+%d\t\t\n", fixup, LBL, DW, type & EXTERN_TYPE ? "0" : taglbl, offset);
if (type)
{
int fixup = fixup_new(tag, type, FIXUP_WORD);
char *taglbl = tag_string(tag, type);
printf("\t%s\t$7E\t\t\t; DAW\t%s+%d\n", DB, taglbl, offset);
printf("_F%03d%c\t%s\t%s+%d\t\t\n", fixup, LBL, DW, type & EXTERN_TYPE ? "0" : taglbl, offset);
}
else
printf("\t%s\t$7E,$%02X,$%02X\t\t; DAW\t%d\n", DB, offset&0xFF,(offset>>8)&0xFF, offset);
}
void emit_localaddr(int index)
{
@ -695,6 +708,7 @@ void emit_brtru(int tag)
}
void emit_brnch(int tag)
{
emit_pending_seq();
printf("\t%s\t$50\t\t\t; BRNCH\t_B%03d\n", DB, tag);
printf("\t%s\t_B%03d-*\n", DW, tag);
}
@ -705,16 +719,19 @@ void emit_breq(int tag)
}
void emit_brne(int tag)
{
emit_pending_seq();
printf("\t%s\t$3E\t\t\t; BRNE\t_B%03d\n", DB, tag);
printf("\t%s\t_B%03d-*\n", DW, tag);
}
void emit_brgt(int tag)
{
emit_pending_seq();
printf("\t%s\t$38\t\t\t; BRGT\t_B%03d\n", DB, tag);
printf("\t%s\t_B%03d-*\n", DW, tag);
}
void emit_brlt(int tag)
{
emit_pending_seq();
printf("\t%s\t$3A\t\t\t; BRLT\t_B%03d\n", DB, tag);
printf("\t%s\t_B%03d-*\n", DW, tag);
}
@ -739,6 +756,7 @@ void emit_ical(void)
}
void emit_leave(void)
{
emit_pending_seq();
if (localsize)
printf("\t%s\t$5A\t\t\t; LEAVE\n", DB);
else
@ -746,6 +764,7 @@ void emit_leave(void)
}
void emit_ret(void)
{
emit_pending_seq();
printf("\t%s\t$5C\t\t\t; RET\n", DB);
}
void emit_enter(int cparams)
@ -769,6 +788,7 @@ void emit_pull_exp(void)
}
void emit_drop(void)
{
emit_pending_seq();
printf("\t%s\t$30\t\t\t; DROP\n", DB);
}
void emit_dup(void)
@ -777,6 +797,7 @@ void emit_dup(void)
}
int emit_unaryop(t_token op)
{
emit_pending_seq();
switch (op)
{
case NEG_TOKEN:
@ -808,6 +829,7 @@ int emit_unaryop(t_token op)
}
int emit_op(t_token op)
{
emit_pending_seq();
switch (op)
{
case MUL_TOKEN:
@ -908,12 +930,69 @@ void release_seq(t_opseq *seq)
freeop_lst = op;
}
}
/*
* Indicate if an address is (or might be) memory-mapped hardware; used to avoid
* optimising away accesses to such addresses.
*/
int is_hardware_address(int addr)
{
// TODO: I think this is reasonable for Apple hardware but I'm not sure.
// It's a bit too strong for Acorn hardware but code is unlikely to try to
// read from high addresses anyway, so there's no real harm in not
// optimising such accesses anyway.
return addr >= 0xC000;
}
/*
* Replace all but the first of a series of identical load opcodes by DUP. This
* doesn't reduce the number of opcodes but does reduce their size in bytes.
* This is only called on the second optimisation pass because the DUP opcodes
* may inhibit other peephole optimisations which are more valuable.
*/
int try_dupify(t_opseq *op)
{
int crunched = 0;
t_opseq *opn = op->nextop;
for (; opn; opn = opn->nextop)
{
if (op->code != opn->code)
return crunched;
switch (op->code)
{
case CONST_CODE:
if (op->val != opn->val)
return crunched;
break;
case LADDR_CODE:
case LLB_CODE:
case LLW_CODE:
if (op->offsz != opn->offsz)
return crunched;
break;
case GADDR_CODE:
case LAB_CODE:
case LAW_CODE:
if ((op->tag != opn->tag) || (op->offsz != opn->offsz) ||
(op->type != opn->type))
return crunched;
break;
default:
return crunched;
}
opn->code = DUP_CODE;
crunched = 1;
}
return crunched;
}
/*
* Crunch sequence (peephole optimize)
*/
int crunch_seq(t_opseq **seq)
int crunch_seq(t_opseq **seq, int pass)
{
t_opseq *opnext, *opnextnext;
t_opseq *opnext, *opnextnext, *opprev = 0;
t_opseq *op = *seq;
int crunched = 0;
int freeops = 0;
@ -977,15 +1056,7 @@ int crunch_seq(t_opseq **seq)
break;
case BRFALSE_CODE:
if (op->val)
{
opnextnext = opnext->nextop; // Remove never taken branch
if (op == *seq)
*seq = opnextnext;
opnext->nextop = NULL;
release_seq(op);
opnext = opnextnext;
crunched = 1;
}
freeops = -2; // Remove constant and never taken branch
else
{
op->code = BRNCH_CODE; // Always taken branch
@ -995,15 +1066,7 @@ int crunch_seq(t_opseq **seq)
break;
case BRTRUE_CODE:
if (!op->val)
{
opnextnext = opnext->nextop; // Remove never taken branch
if (op == *seq)
*seq = opnextnext;
opnext->nextop = NULL;
release_seq(op);
opnext = opnextnext;
crunched = 1;
}
freeops = -2; // Remove constant never taken branch
else
{
op->code = BRNCH_CODE; // Always taken branch
@ -1011,7 +1074,19 @@ int crunch_seq(t_opseq **seq)
freeops = 1;
}
break;
case CONST_CODE: // Collapse constant operation
case NE_CODE:
if (!op->val)
freeops = -2; // Remove ZERO:ISNE
break;
case EQ_CODE:
if (!op->val)
{
op->code = LOGIC_NOT_CODE;
freeops = 1;
}
break;
case CONST_CODE:
// Collapse constant operation
if ((opnextnext = opnext->nextop))
switch (opnextnext->code)
{
@ -1088,6 +1163,9 @@ int crunch_seq(t_opseq **seq)
freeops = 2;
break;
}
// End of collapse constant operation
if ((pass > 0) && (freeops == 0) && (op->val != 0))
crunched = try_dupify(op);
break; // CONST_CODE
case BINARY_CODE(MUL_TOKEN):
for (shiftcnt = 0; shiftcnt < 16; shiftcnt++)
@ -1148,6 +1226,8 @@ int crunch_seq(t_opseq **seq)
freeops = 1;
break;
}
if ((pass > 0) && (freeops == 0))
crunched = try_dupify(op);
break; // LADDR_CODE
case GADDR_CODE:
switch (opnext->code)
@ -1188,7 +1268,54 @@ int crunch_seq(t_opseq **seq)
freeops = 1;
break;
}
if ((pass > 0) && (freeops == 0))
crunched = try_dupify(op);
break; // GADDR_CODE
case LLB_CODE:
if (pass > 0)
crunched = try_dupify(op);
break; // LLB_CODE
case LLW_CODE:
// LLW [n]:CB 8:SHR -> LLB [n+1]
if ((opnext->code == CONST_CODE) && (opnext->val == 8))
{
if ((opnextnext = opnext->nextop))
{
if (opnextnext->code == SHR_CODE)
{
op->code = LLB_CODE;
op->offsz++;
freeops = 2;
break;
}
}
}
if ((pass > 0) && (freeops == 0))
crunched = try_dupify(op);
break; // LLW_CODE
case LAB_CODE:
if ((pass > 0) && (op->type || !is_hardware_address(op->offsz)))
crunched = try_dupify(op);
break; // LAB_CODE
case LAW_CODE:
// LAW x:CB 8:SHR -> LAB x+1
if ((opnext->code == CONST_CODE) && (opnext->val == 8))
{
if ((opnextnext = opnext->nextop))
{
if (opnextnext->code == SHR_CODE)
{
op->code = LAB_CODE;
op->offsz++;
freeops = 2;
break;
}
}
}
if ((pass > 0) && (freeops == 0) &&
(op->type || !is_hardware_address(op->offsz)))
crunched = try_dupify(op);
break; // LAW_CODE
case LOGIC_NOT_CODE:
switch (opnext->code)
{
@ -1204,10 +1331,64 @@ int crunch_seq(t_opseq **seq)
break;
}
break; // LOGIC_NOT_CODE
case SLB_CODE:
if ((opnext->code == LLB_CODE) && (op->offsz == opnext->offsz))
{
op->code = DLB_CODE;
freeops = 1;
}
break; // SLB_CODE
case SLW_CODE:
if ((opnext->code == LLW_CODE) && (op->offsz == opnext->offsz))
{
op->code = DLW_CODE;
freeops = 1;
}
break; // SLW_CODE
case SAB_CODE:
if ((opnext->code == LAB_CODE) && (op->tag == opnext->tag) &&
(op->offsz == opnext->offsz) && (op->type == opnext->type))
{
op->code = DAB_CODE;
freeops = 1;
}
break; // SAB_CODE
case SAW_CODE:
if ((opnext->code == LAW_CODE) && (op->tag == opnext->tag) &&
(op->offsz == opnext->offsz) && (op->type == opnext->type))
{
op->code = DAW_CODE;
freeops = 1;
}
break; // SAW_CODE
}
//
// Free up crunched ops
//
// Free up crunched ops. If freeops is positive we free up that many ops
// *after* op; if it's negative, we free up abs(freeops) ops *starting
// with* op.
if (freeops < 0)
{
freeops = -freeops;
// If op is at the start of the sequence, we treat this as a special
// case.
if (op == *seq)
{
for (; freeops > 0; --freeops)
{
opnext = op->nextop;
release_op(op);
op = *seq = opnext;
}
crunched = 1;
}
// Otherwise we just move op back to point to the previous op and
// let the following loop remove the required number of ops.
else
{
op = opprev;
opnext = op->nextop;
}
}
while (freeops)
{
op->nextop = opnext->nextop;
@ -1217,6 +1398,7 @@ int crunch_seq(t_opseq **seq)
crunched = 1;
freeops--;
}
opprev = op;
op = opnext;
}
return (crunched);
@ -1261,18 +1443,57 @@ t_opseq *cat_seq(t_opseq *seq1, t_opseq *seq2)
return (seq1);
}
/*
* Emit a sequence of ops
* Emit a sequence of ops (into the pending sequence)
*/
int emit_seq(t_opseq *seq)
{
t_opseq *op;
int emitted = 0;
int string = 0;
for (op = seq; op; op = op->nextop)
{
if (op->code == STR_CODE)
string = 1;
emitted++;
}
pending_seq = cat_seq(pending_seq, seq);
// The source code comments in the output are much more logical if we don't
// merge multiple sequences together. There's no value in doing this merging
// if we're not optimizing, and we optionally allow it to be prevented even
// when we are optimizing by specifing the -N (NO_COMBINE) flag.
//
// We must also force output if the sequence includes a CS opcode, as the
// associated 'constant' is only temporarily valid.
if (!(outflags & OPTIMIZE) || (outflags & NO_COMBINE) || string)
return emit_pending_seq();
return (emitted);
}
/*
* Emit the pending sequence
*/
int emit_pending_seq()
{
// This is called by some of the emit_*() functions to ensure that any
// pending ops are emitted before they emit their own op when they are
// called from the parser. However, this function itself calls some of those
// emit_*() functions to emit instructions from the pending sequence, which
// would cause an infinite loop if we weren't careful. We therefore set
// pending_seq to null on entry and work with a local copy, so if this
// function calls back into itself it is a no-op.
if (!pending_seq)
return 0;
t_opseq *local_pending_seq = pending_seq;
pending_seq = 0;
t_opseq *op;
int emitted = 0;
if (outflags & OPTIMIZE)
while (crunch_seq(&seq));
while (seq)
for (int pass = 0; pass < 2; pass++)
while (crunch_seq(&local_pending_seq, pass));
while (local_pending_seq)
{
op = seq;
op = local_pending_seq;
switch (op->code)
{
case NEG_CODE:
@ -1402,7 +1623,7 @@ int emit_seq(t_opseq *seq)
return (0);
}
emitted++;
seq = seq->nextop;
local_pending_seq = local_pending_seq->nextop;
/*
* Free this op
*/

View File

@ -8,31 +8,31 @@ typedef struct _opseq {
} t_opseq;
#define UNARY_CODE(tkn) ((tkn)|0x0100)
#define BINARY_CODE(tkn) ((tkn)|0x0200)
#define NEG_CODE 0x0100|NEG_TOKEN
#define COMP_CODE 0x0100|COMP_TOKEN
#define LOGIC_NOT_CODE 0x0100|LOGIC_NOT_TOKEN
#define INC_CODE 0x0100|INC_TOKEN
#define DEC_CODE 0x0100|DEC_TOKEN
#define BPTR_CODE 0x0100|BPTR_TOKEN
#define WPTR_CODE 0x0100|WPTR_TOKEN
#define MUL_CODE 0x0200|MUL_TOKEN
#define DIV_CODE 0x0200|DIV_TOKEN
#define MOD_CODE 0x0200|MOD_TOKEN
#define ADD_CODE 0x0200|ADD_TOKEN
#define SUB_CODE 0x0200|SUB_TOKEN
#define SHL_CODE 0x0200|SHL_TOKEN
#define SHR_CODE 0x0200|SHR_TOKEN
#define AND_CODE 0x0200|AND_TOKEN
#define OR_CODE 0x0200|OR_TOKEN
#define EOR_CODE 0x0200|EOR_TOKEN
#define EQ_CODE 0x0200|EQ_TOKEN
#define NE_CODE 0x0200|NE_TOKEN
#define GE_CODE 0x0200|GE_TOKEN
#define LT_CODE 0x0200|LT_TOKEN
#define GT_CODE 0x0200|GT_TOKEN
#define LE_CODE 0x0200|LE_TOKEN
#define LOGIC_OR_CODE 0x0200|LOGIC_OR_TOKEN
#define LOGIC_AND_CODE 0x0200|LOGIC_AND_TOKEN
#define NEG_CODE (0x0100|NEG_TOKEN)
#define COMP_CODE (0x0100|COMP_TOKEN)
#define LOGIC_NOT_CODE (0x0100|LOGIC_NOT_TOKEN)
#define INC_CODE (0x0100|INC_TOKEN)
#define DEC_CODE (0x0100|DEC_TOKEN)
#define BPTR_CODE (0x0100|BPTR_TOKEN)
#define WPTR_CODE (0x0100|WPTR_TOKEN)
#define MUL_CODE (0x0200|MUL_TOKEN)
#define DIV_CODE (0x0200|DIV_TOKEN)
#define MOD_CODE (0x0200|MOD_TOKEN)
#define ADD_CODE (0x0200|ADD_TOKEN)
#define SUB_CODE (0x0200|SUB_TOKEN)
#define SHL_CODE (0x0200|SHL_TOKEN)
#define SHR_CODE (0x0200|SHR_TOKEN)
#define AND_CODE (0x0200|AND_TOKEN)
#define OR_CODE (0x0200|OR_TOKEN)
#define EOR_CODE (0x0200|EOR_TOKEN)
#define EQ_CODE (0x0200|EQ_TOKEN)
#define NE_CODE (0x0200|NE_TOKEN)
#define GE_CODE (0x0200|GE_TOKEN)
#define LT_CODE (0x0200|LT_TOKEN)
#define GT_CODE (0x0200|GT_TOKEN)
#define LE_CODE (0x0200|LE_TOKEN)
#define LOGIC_OR_CODE (0x0200|LOGIC_OR_TOKEN)
#define LOGIC_AND_CODE (0x0200|LOGIC_AND_TOKEN)
#define CONST_CODE 0x0300
#define STR_CODE 0x0301
#define LB_CODE 0x0302
@ -142,7 +142,8 @@ void emit_start(void);
void emit_rld(void);
void emit_esd(void);
void release_seq(t_opseq *seq);
int crunch_seq(t_opseq **seq);
int crunch_seq(t_opseq **seq, int pass);
t_opseq *gen_seq(t_opseq *seq, int opcode, long cval, int tag, int offsz, int type);
t_opseq *cat_seq(t_opseq *seq1, t_opseq *seq2);
int emit_seq(t_opseq *seq);
int emit_pending_seq();

View File

@ -24,6 +24,9 @@ int main(int argc, char **argv)
case 'O':
outflags |= OPTIMIZE;
break;
case 'N':
outflags |= NO_COMBINE;
break;
case 'W':
outflags |= WARNINGS;
}

View File

@ -8,6 +8,7 @@
#define INIT (1<<4)
#define SYSFLAGS (1<<5)
#define WARNINGS (1<<6)
#define NO_COMBINE (1<<7)
extern int outflags;
#include "tokens.h"
#include "lex.h"