diff --git a/src/cc65/codegen.c b/src/cc65/codegen.c index a905e7128..a852e19b6 100644 --- a/src/cc65/codegen.c +++ b/src/cc65/codegen.c @@ -3486,6 +3486,8 @@ void g_lt (unsigned flags, unsigned long val) "tosltax", "tosultax", "toslteax", "tosulteax", }; + unsigned Label; + /* If the right hand side is const, the lhs is not on stack but still * in the primary register. */ @@ -3545,32 +3547,84 @@ void g_lt (unsigned flags, unsigned long val) } else if (val == 0) { - /* Look at the type */ + /* A signed compare against zero must only look at the sign bit */ switch (flags & CF_TYPE) { case CF_CHAR: if (flags & CF_FORCECHAR) { - AddCodeLine ("tax"); - AddCodeLine ("jsr boollt"); + AddCodeLine ("asl a"); /* Bit 7 -> carry */ + AddCodeLine ("ldx #$00"); + AddCodeLine ("lda #$00"); + AddCodeLine ("rol a"); return; } /* FALLTHROUGH */ case CF_INT: /* Just check the high byte */ - AddCodeLine ("txa"); - AddCodeLine ("jsr boollt"); + AddCodeLine ("cpx #$80"); /* Bit 7 -> carry */ + AddCodeLine ("ldx #$00"); + AddCodeLine ("lda #$00"); + AddCodeLine ("rol a"); return; case CF_LONG: /* Just check the high byte */ AddCodeLine ("lda sreg+1"); - AddCodeLine ("jsr boollt"); + AddCodeLine ("asl a"); /* Bit 7 -> carry */ + AddCodeLine ("ldx #$00"); + AddCodeLine ("lda #$00"); + AddCodeLine ("rol a"); return; default: typeerror (flags); } + + } else { + + /* Signed compare against a constant != zero */ + switch (flags & CF_TYPE) { + + case CF_CHAR: + if (flags & CF_FORCECHAR) { + Label = GetLocalLabel (); + AddCodeLine ("sec"); + AddCodeLine ("sbc #$%02X", (unsigned char)val); + AddCodeLine ("bvc %s", LocalLabelName (Label)); + AddCodeLine ("eor #$80"); + g_defcodelabel (Label); + AddCodeLine ("asl a"); /* Bit 7 -> carry */ + AddCodeLine ("ldx #$00"); + AddCodeLine ("lda #$00"); + AddCodeLine ("rol a"); + return; + } + /* FALLTHROUGH */ + + case CF_INT: + /* Do a subtraction */ + Label = GetLocalLabel (); + AddCodeLine ("cmp #$%02X", (unsigned char)val); + AddCodeLine ("txa"); + AddCodeLine ("sbc #$%02X", (unsigned char)(val >> 8)); + AddCodeLine ("bvc %s", LocalLabelName (Label)); + AddCodeLine ("eor #$80"); + g_defcodelabel (Label); + AddCodeLine ("asl a"); /* Bit 7 -> carry */ + AddCodeLine ("ldx #$00"); + AddCodeLine ("lda #$00"); + AddCodeLine ("rol a"); + return; + + case CF_LONG: + /* This one is too costly */ + break; + + default: + typeerror (flags); + } + } /* If we go here, we didn't emit code. Push the lhs on stack and fall @@ -3864,26 +3918,27 @@ void g_ge (unsigned flags, unsigned long val) case CF_CHAR: if (flags & CF_FORCECHAR) { + /* Do a subtraction. Condition is true if carry set */ AddCodeLine ("cmp #$%02X", (unsigned char)val); - AddCodeLine ("jsr booluge"); + AddCodeLine ("lda #$00"); + AddCodeLine ("ldx #$00"); + AddCodeLine ("rol a"); return; } /* FALLTHROUGH */ case CF_INT: - /* If the low byte is zero, we must only test the high byte */ - AddCodeLine ("cpx #$%02X", (unsigned char)(val >> 8)); - if ((val & 0xFF) != 0) { - unsigned L = GetLocalLabel(); - AddCodeLine ("bne %s", LocalLabelName (L)); - AddCodeLine ("cmp #$%02X", (unsigned char)val); - g_defcodelabel (L); - } - AddCodeLine ("jsr booluge"); + /* Do a subtraction. Condition is true if carry set */ + AddCodeLine ("cmp #$%02X", (unsigned char)val); + AddCodeLine ("txa"); + AddCodeLine ("sbc #$%02X", (unsigned char)(val >> 8)); + AddCodeLine ("lda #$00"); + AddCodeLine ("ldx #$00"); + AddCodeLine ("rol a"); return; case CF_LONG: - /* Do a subtraction */ + /* Do a subtraction. Condition is true if carry set */ AddCodeLine ("cmp #$%02X", (unsigned char)val); AddCodeLine ("txa"); AddCodeLine ("sbc #$%02X", (unsigned char)(val >> 8)); @@ -3891,7 +3946,9 @@ void g_ge (unsigned flags, unsigned long val) AddCodeLine ("sbc #$%02X", (unsigned char)(val >> 16)); AddCodeLine ("lda sreg+1"); AddCodeLine ("sbc #$%02X", (unsigned char)(val >> 24)); - AddCodeLine ("jsr booluge"); + AddCodeLine ("lda #$00"); + AddCodeLine ("ldx #$00"); + AddCodeLine ("rol a"); return; default: @@ -3900,7 +3957,7 @@ void g_ge (unsigned flags, unsigned long val) } else if (val == 0) { - /* Look at the type */ + /* A signed compare against zero must only look at the sign bit */ switch (flags & CF_TYPE) { case CF_CHAR: diff --git a/src/cc65/codeopt.c b/src/cc65/codeopt.c index 3b30c9cd8..8ec1e2275 100644 --- a/src/cc65/codeopt.c +++ b/src/cc65/codeopt.c @@ -408,11 +408,11 @@ static unsigned OptShift4 (CodeSeg* S) * ldx tmp1 * * which makes 4 + 3 * ShiftCount bytes, compared to the original - * 3 bytes for the subroutine call. However, in most cases, the + * 3 bytes for the subroutine call. However, in most cases, the * final load of the X register gets merged with some other insn * and replaces a txa, so for a shift count of 1, we get a factor * of 200, which matches nicely the CodeSizeFactor enabled with -Oi - */ + */ if (ShiftCount > 1 || S->CodeSizeFactor > 200) { unsigned Size = 4 + 3 * ShiftCount; if ((Size * 100 / 3) > S->CodeSizeFactor) { @@ -1097,7 +1097,8 @@ static OptFunc DOptCmp5 = { OptCmp5, "OptCmp5", 100, 0, static OptFunc DOptCmp6 = { OptCmp6, "OptCmp6", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptCmp7 = { OptCmp7, "OptCmp7", 85, 0, 0, 0, 0, 0 }; static OptFunc DOptCmp8 = { OptCmp8, "OptCmp8", 50, 0, 0, 0, 0, 0 }; -static OptFunc DOptCondBranches = { OptCondBranches, "OptCondBranches", 80, 0, 0, 0, 0, 0 }; +static OptFunc DOptCondBranches1= { OptCondBranches1,"OptCondBranches1", 80, 0, 0, 0, 0, 0 }; +static OptFunc DOptCondBranches2= { OptCondBranches2,"OptCondBranches2", 0, 0, 0, 0, 0, 0 }; static OptFunc DOptDeadCode = { OptDeadCode, "OptDeadCode", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptDeadJumps = { OptDeadJumps, "OptDeadJumps", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptDecouple = { OptDecouple, "OptDecouple", 100, 0, 0, 0, 0, 0 }; @@ -1181,7 +1182,8 @@ static OptFunc* OptFuncs[] = { &DOptCmp6, &DOptCmp7, &DOptCmp8, - &DOptCondBranches, + &DOptCondBranches1, + &DOptCondBranches2, &DOptDeadCode, &DOptDeadJumps, &DOptDecouple, @@ -1558,7 +1560,8 @@ static unsigned RunOptGroup3 (CodeSeg* S) C += RunOptFunc (S, &DOptDeadCode, 1); C += RunOptFunc (S, &DOptJumpTarget1, 1); C += RunOptFunc (S, &DOptJumpTarget2, 1); - C += RunOptFunc (S, &DOptCondBranches, 1); + C += RunOptFunc (S, &DOptCondBranches1, 1); + C += RunOptFunc (S, &DOptCondBranches2, 1); C += RunOptFunc (S, &DOptRTSJumps1, 1); C += RunOptFunc (S, &DOptBoolTrans, 1); C += RunOptFunc (S, &DOptCmp1, 1); diff --git a/src/cc65/coptind.c b/src/cc65/coptind.c index 135c39f00..7eba6c598 100644 --- a/src/cc65/coptind.c +++ b/src/cc65/coptind.c @@ -706,7 +706,7 @@ NextEntry: -unsigned OptCondBranches (CodeSeg* S) +unsigned OptCondBranches1 (CodeSeg* S) /* Performs several optimization steps: * * - If an immidiate load of a register is followed by a conditional jump that @@ -799,6 +799,63 @@ unsigned OptCondBranches (CodeSeg* S) +unsigned OptCondBranches2 (CodeSeg* S) +/* If on entry to a "rol a" instruction the accu is zero, and a beq/bne follows, + * we can remove the rol and branch on the state of the carry. + */ +{ + unsigned Changes = 0; + + /* Generate register info for this step */ + CS_GenRegInfo (S); + + /* Walk over the entries */ + unsigned I = 0; + while (I < CS_GetEntryCount (S)) { + + CodeEntry* N; + + /* Get next entry */ + CodeEntry* E = CS_GetEntry (S, I); + + /* Check if it's a rol insn with A in accu and a branch follows */ + if (E->OPC == OP65_ROL && + E->AM == AM65_ACC && + E->RI->In.RegA == 0 && + !CE_HasLabel (E) && + (N = CS_GetNextEntry (S, I)) != 0 && + (N->Info & OF_ZBRA) != 0 && + !RegAUsed (S, I+1)) { + + /* Replace the branch condition */ + switch (GetBranchCond (N->OPC)) { + case BC_EQ: CE_ReplaceOPC (N, OP65_JCC); break; + case BC_NE: CE_ReplaceOPC (N, OP65_JCS); break; + default: Internal ("Unknown branch condition in OptCondBranches2"); + } + + /* Delete the rol insn */ + CS_DelEntry (S, I); + + /* Remember, we had changes */ + ++Changes; + + } + + /* Next entry */ + ++I; + + } + + /* Free register info */ + CS_FreeRegInfo (S); + + /* Return the number of changes made */ + return Changes; +} + + + /*****************************************************************************/ /* Remove unused loads and stores */ /*****************************************************************************/ @@ -975,7 +1032,7 @@ unsigned OptDupLoads (CodeSeg* S) * remove the store. */ if (RegValIsKnown (In->RegA) && /* Value of A is known */ - E->AM == AM65_ZP && /* Store into zp */ + E->AM == AM65_ZP && /* Store into zp */ In->RegA == ZPRegVal (E->Chg, In)) { /* Value identical */ Delete = 1; @@ -1028,7 +1085,7 @@ unsigned OptDupLoads (CodeSeg* S) */ } else if (RegValIsKnown (In->RegY)) { if (In->RegY == In->RegA) { - CE_ReplaceOPC (E, OP65_STA); + CE_ReplaceOPC (E, OP65_STA); } else if (In->RegY == In->RegX && E->AM != AM65_ABSX && E->AM != AM65_ZPX) { @@ -1081,7 +1138,7 @@ unsigned OptDupLoads (CodeSeg* S) case OP65_TYA: if (RegValIsKnown (In->RegY) && - In->RegY == In->RegA && + In->RegY == In->RegA && (N = CS_GetNextEntry (S, I)) != 0 && !CE_UseLoadFlags (N)) { /* Value is identical and not followed by a branch */ @@ -1130,10 +1187,10 @@ unsigned OptStoreLoad (CodeSeg* S) unsigned I = 0; while (I < CS_GetEntryCount (S)) { - CodeEntry* N; - CodeEntry* X; + CodeEntry* N; + CodeEntry* X; - /* Get next entry */ + /* Get next entry */ CodeEntry* E = CS_GetEntry (S, I); /* Check if it is a store instruction followed by a load from the @@ -1887,7 +1944,7 @@ unsigned OptPrecalc (CodeSeg* S) /*****************************************************************************/ - + unsigned OptBranchDist (CodeSeg* S) /* Change branches for the distance needed. */ { diff --git a/src/cc65/coptind.h b/src/cc65/coptind.h index 2c7aa9034..eb22301e1 100644 --- a/src/cc65/coptind.h +++ b/src/cc65/coptind.h @@ -89,12 +89,17 @@ unsigned OptJumpTarget2 (CodeSeg* S); * it's job is already done. */ -unsigned OptCondBranches (CodeSeg* S); +unsigned OptCondBranches1 (CodeSeg* S); /* If an immidiate load of a register is followed by a conditional jump that * is never taken because the load of the register sets the flags in such a * manner, remove the conditional branch. */ +unsigned OptCondBranches2 (CodeSeg* S); +/* If on entry to a "rol a" instruction the accu is zero, and a beq/bne follows, + * we can remove the rol and branch on the state of the carry. + */ + unsigned OptUnusedLoads (CodeSeg* S); /* Remove loads of registers where the value loaded is not used later. */