diff --git a/src/main/java/dk/camelot64/kickc/Compiler.java b/src/main/java/dk/camelot64/kickc/Compiler.java index ccbaf5388..1ec80b3d6 100644 --- a/src/main/java/dk/camelot64/kickc/Compiler.java +++ b/src/main/java/dk/camelot64/kickc/Compiler.java @@ -576,8 +576,21 @@ public class Compiler { // Phi mem coalesce removes as many variables introduced by phi lifting as possible - as long as their live ranges do not overlap new Pass3PhiMemCoalesce(program).step(); + + if(getLog().isVerboseSSAOptimize()) { + getLog().append("CONTROL FLOW GRAPH"); + getLog().append(program.getGraph().toString(program)); + } new PassNCullEmptyBlocks(program, false).step(); + if(getLog().isVerboseSSAOptimize()) { + getLog().append("CONTROL FLOW GRAPH"); + getLog().append(program.getGraph().toString(program)); + } new PassNRenumberLabels(program, false).execute(); + if(getLog().isVerboseSSAOptimize()) { + getLog().append("CONTROL FLOW GRAPH"); + getLog().append(program.getGraph().toString(program)); + } new PassNBlockSequencePlanner(program).step(); new Pass3AddNopBeforeCallOns(program).generate(); new PassNStatementIndices(program).execute(); diff --git a/src/main/java/dk/camelot64/kickc/passes/PassNCullEmptyBlocks.java b/src/main/java/dk/camelot64/kickc/passes/PassNCullEmptyBlocks.java index 2e5c46723..e1f0c6038 100644 --- a/src/main/java/dk/camelot64/kickc/passes/PassNCullEmptyBlocks.java +++ b/src/main/java/dk/camelot64/kickc/passes/PassNCullEmptyBlocks.java @@ -60,20 +60,6 @@ public class PassNCullEmptyBlocks extends Pass2SsaOptimization { if(dontCull) continue; - for(ControlFlowBlock predecessor : predecessors) { - Map replace = new LinkedHashMap<>(); - replace.put(removeBlock.getLabel(), successorRef); - if(removeBlock.getLabel().equals(predecessor.getDefaultSuccessor())) { - predecessor.setDefaultSuccessor(successorRef); - } - if(removeBlock.getLabel().equals(predecessor.getConditionalSuccessor())) { - predecessor.setConditionalSuccessor(successorRef); - } - if(removeBlock.getLabel().equals(predecessor.getCallSuccessor())) { - predecessor.setCallSuccessor(successorRef); - } - replaceLabels(predecessor, replace); - } // In all phi functions of a successor blocks make a copy of the phi assignment for each predecessor ControlFlowGraphBaseVisitor phiFixVisitor = new ControlFlowGraphBaseVisitor() { @Override @@ -99,6 +85,21 @@ public class PassNCullEmptyBlocks extends Pass2SsaOptimization { } }; phiFixVisitor.visitBlock(successor); + + for(ControlFlowBlock predecessor : predecessors) { + Map replace = new LinkedHashMap<>(); + replace.put(removeBlock.getLabel(), successorRef); + if(removeBlock.getLabel().equals(predecessor.getDefaultSuccessor())) { + predecessor.setDefaultSuccessor(successorRef); + } + if(removeBlock.getLabel().equals(predecessor.getConditionalSuccessor())) { + predecessor.setConditionalSuccessor(successorRef); + } + if(removeBlock.getLabel().equals(predecessor.getCallSuccessor())) { + predecessor.setCallSuccessor(successorRef); + } + replaceLabels(predecessor, replace); + } getGraph().getAllBlocks().remove(removeBlock); LabelRef removeBlockLabelRef = removeBlock.getLabel(); Label removeBlockLabel = getScope().getLabel(removeBlockLabelRef); diff --git a/src/test/java/dk/camelot64/kickc/test/TestPrograms.java b/src/test/java/dk/camelot64/kickc/test/TestPrograms.java index 232501ce1..26ff0226e 100644 --- a/src/test/java/dk/camelot64/kickc/test/TestPrograms.java +++ b/src/test/java/dk/camelot64/kickc/test/TestPrograms.java @@ -3955,6 +3955,11 @@ public class TestPrograms { compileAndCompare("inline-string-2.c"); } + @Test + public void testLoopProblem3() throws IOException, URISyntaxException { + compileAndCompare("loop-problem3.c"); + } + @Test public void testLoopProblem2() throws IOException, URISyntaxException { compileAndCompare("loop-problem2.c"); diff --git a/src/test/kc/loop-problem3.c b/src/test/kc/loop-problem3.c new file mode 100644 index 000000000..b35696519 --- /dev/null +++ b/src/test/kc/loop-problem3.c @@ -0,0 +1,7 @@ +// A loop that compiles to a wrong sequence - skipping the initilization + +void main() { + for(;;) + for(char* sc = 0x0400;sc<0x0800; sc++) + (*sc)++; +} \ No newline at end of file diff --git a/src/test/ref/loop-problem3.asm b/src/test/ref/loop-problem3.asm new file mode 100644 index 000000000..42630e2ca --- /dev/null +++ b/src/test/ref/loop-problem3.asm @@ -0,0 +1,36 @@ +// A loop that compiles to a wrong sequence - skipping the initilization +.pc = $801 "Basic" +:BasicUpstart(main) +.pc = $80d "Program" +main: { + .label sc = 2 + __b3: + lda #<$400 + sta.z sc + lda #>$400 + sta.z sc+1 + __b1: + // for(char* sc = 0x0400;sc<0x0800; sc++) + lda.z sc+1 + cmp #>$800 + bcc __b2 + bne !+ + lda.z sc + cmp #<$800 + bcc __b2 + !: + jmp __b3 + __b2: + // (*sc)++; + ldy #0 + lda (sc),y + clc + adc #1 + sta (sc),y + // for(char* sc = 0x0400;sc<0x0800; sc++) + inc.z sc + bne !+ + inc.z sc+1 + !: + jmp __b1 +} diff --git a/src/test/ref/loop-problem3.cfg b/src/test/ref/loop-problem3.cfg new file mode 100644 index 000000000..c40bc090e --- /dev/null +++ b/src/test/ref/loop-problem3.cfg @@ -0,0 +1,13 @@ + +void main() +main: scope:[main] from + [0] phi() + to:main::@1 +main::@1: scope:[main] from main main::@1 main::@2 + [1] main::sc#2 = phi( main/(byte*) 1024, main::@2/main::sc#1, main::@1/(byte*) 1024 ) + [2] if(main::sc#2<$800) goto main::@2 + to:main::@1 +main::@2: scope:[main] from main::@1 + [3] *main::sc#2 = ++ *main::sc#2 + [4] main::sc#1 = ++ main::sc#2 + to:main::@1 diff --git a/src/test/ref/loop-problem3.log b/src/test/ref/loop-problem3.log new file mode 100644 index 000000000..79feeb963 --- /dev/null +++ b/src/test/ref/loop-problem3.log @@ -0,0 +1,245 @@ + +CONTROL FLOW GRAPH SSA + +void main() +main: scope:[main] from __start + to:main::@1 +main::@1: scope:[main] from main main::@2 + main::sc#0 = (byte*)$400 + to:main::@2 +main::@2: scope:[main] from main::@1 main::@3 + main::sc#2 = phi( main::@1/main::sc#0, main::@3/main::sc#1 ) + main::$0 = main::sc#2 < $800 + if(main::$0) goto main::@3 + to:main::@1 +main::@3: scope:[main] from main::@2 + main::sc#3 = phi( main::@2/main::sc#2 ) + *main::sc#3 = ++ *main::sc#3 + main::sc#1 = ++ main::sc#3 + to:main::@2 +main::@return: scope:[main] from + return + to:@return + +void __start() +__start: scope:[__start] from + call main + to:__start::@1 +__start::@1: scope:[__start] from __start + to:__start::@return +__start::@return: scope:[__start] from __start::@1 + return + to:@return + +SYMBOL TABLE SSA +void __start() +void main() +bool~ main::$0 +byte* main::sc +byte* main::sc#0 +byte* main::sc#1 +byte* main::sc#2 +byte* main::sc#3 + +Adding number conversion cast (unumber) $800 in main::$0 = main::sc#2 < $800 +Successful SSA optimization PassNAddNumberTypeConversions +Simplifying constant pointer cast (byte*) 1024 +Simplifying constant integer cast $800 +Successful SSA optimization PassNCastSimplification +Finalized unsigned number type $800 +Successful SSA optimization PassNFinalizeNumberTypeConversions +Alias main::sc#2 = main::sc#3 +Successful SSA optimization Pass2AliasElimination +Simple Condition main::$0 [3] if(main::sc#2<$800) goto main::@3 +Successful SSA optimization Pass2ConditionalJumpSimplification +Constant main::sc#0 = (byte*) 1024 +Successful SSA optimization Pass2ConstantIdentification +Removing unused block main::@return +Successful SSA optimization Pass2EliminateUnusedBlocks +Removing unused procedure __start +Removing unused procedure block __start +Removing unused procedure block __start::@1 +Removing unused procedure block __start::@return +Successful SSA optimization PassNEliminateEmptyStart +Inlining constant with var siblings main::sc#0 +Constant inlined main::sc#0 = (byte*) 1024 +Successful SSA optimization Pass2ConstantInlining +Adding NOP phi() at start of main +Adding NOP phi() at start of main::@1 +CALL GRAPH + +Created 1 initial phi equivalence classes +Coalesced [6] main::sc#4 = main::sc#1 +Coalesced down to 1 phi equivalence classes +Culled Empty Block label main::@1 +Renumbering block main::@2 to main::@1 +Renumbering block main::@3 to main::@2 +Adding NOP phi() at start of main + +FINAL CONTROL FLOW GRAPH + +void main() +main: scope:[main] from + [0] phi() + to:main::@1 +main::@1: scope:[main] from main main::@1 main::@2 + [1] main::sc#2 = phi( main/(byte*) 1024, main::@2/main::sc#1, main::@1/(byte*) 1024 ) + [2] if(main::sc#2<$800) goto main::@2 + to:main::@1 +main::@2: scope:[main] from main::@1 + [3] *main::sc#2 = ++ *main::sc#2 + [4] main::sc#1 = ++ main::sc#2 + to:main::@1 + + +VARIABLE REGISTER WEIGHTS +void main() +byte* main::sc +byte* main::sc#1 22.0 +byte* main::sc#2 48.33333333333332 + +Initial phi equivalence classes +[ main::sc#2 main::sc#1 ] +Complete equivalence classes +[ main::sc#2 main::sc#1 ] +Allocated zp[2]:2 [ main::sc#2 main::sc#1 ] +REGISTER UPLIFT POTENTIAL REGISTERS +Statement [2] if(main::sc#2<$800) goto main::@2 [ main::sc#2 ] ( [ main::sc#2 ] { } ) always clobbers reg byte a +Statement [3] *main::sc#2 = ++ *main::sc#2 [ main::sc#2 ] ( [ main::sc#2 ] { } ) always clobbers reg byte a reg byte y +Potential registers zp[2]:2 [ main::sc#2 main::sc#1 ] : zp[2]:2 , + +REGISTER UPLIFT SCOPES +Uplift Scope [main] 70.33: zp[2]:2 [ main::sc#2 main::sc#1 ] +Uplift Scope [] + +Uplifting [main] best 3970 combination zp[2]:2 [ main::sc#2 main::sc#1 ] +Uplifting [] best 3970 combination + +ASSEMBLER BEFORE OPTIMIZATION + // File Comments +// A loop that compiles to a wrong sequence - skipping the initilization + // Upstart +.pc = $801 "Basic" +:BasicUpstart(main) +.pc = $80d "Program" + // Global Constants & labels + // main +main: { + .label sc = 2 + // [1] phi from main main::@1 to main::@1 [phi:main/main::@1->main::@1] + __b1_from_main: + __b1_from___b1: + // [1] phi main::sc#2 = (byte*) 1024 [phi:main/main::@1->main::@1#0] -- pbuz1=pbuc1 + lda #<$400 + sta.z sc + lda #>$400 + sta.z sc+1 + jmp __b1 + // main::@1 + __b1: + // [2] if(main::sc#2<$800) goto main::@2 -- pbuz1_lt_vwuc1_then_la1 + lda.z sc+1 + cmp #>$800 + bcc __b2 + bne !+ + lda.z sc + cmp #<$800 + bcc __b2 + !: + jmp __b1_from___b1 + // main::@2 + __b2: + // [3] *main::sc#2 = ++ *main::sc#2 -- _deref_pbuz1=_inc__deref_pbuz1 + ldy #0 + lda (sc),y + clc + adc #1 + ldy #0 + sta (sc),y + // [4] main::sc#1 = ++ main::sc#2 -- pbuz1=_inc_pbuz1 + inc.z sc + bne !+ + inc.z sc+1 + !: + // [1] phi from main::@2 to main::@1 [phi:main::@2->main::@1] + __b1_from___b2: + // [1] phi main::sc#2 = main::sc#1 [phi:main::@2->main::@1#0] -- register_copy + jmp __b1 +} + // File Data + +ASSEMBLER OPTIMIZATIONS +Removing instruction jmp __b1 +Succesful ASM optimization Pass5NextJumpElimination +Removing instruction ldy #0 +Succesful ASM optimization Pass5UnnecesaryLoadElimination +Removing instruction __b1_from_main: +Succesful ASM optimization Pass5RedundantLabelElimination +Removing instruction __b1_from___b2: +Succesful ASM optimization Pass5UnusedLabelElimination +Relabelling long label __b1_from___b1 to __b3 +Succesful ASM optimization Pass5RelabelLongLabels + +FINAL SYMBOL TABLE +void main() +byte* main::sc +byte* main::sc#1 sc zp[2]:2 22.0 +byte* main::sc#2 sc zp[2]:2 48.33333333333332 + +zp[2]:2 [ main::sc#2 main::sc#1 ] + + +FINAL ASSEMBLER +Score: 3650 + + // File Comments +// A loop that compiles to a wrong sequence - skipping the initilization + // Upstart +.pc = $801 "Basic" +:BasicUpstart(main) +.pc = $80d "Program" + // Global Constants & labels + // main +main: { + .label sc = 2 + // [1] phi from main main::@1 to main::@1 [phi:main/main::@1->main::@1] + __b3: + // [1] phi main::sc#2 = (byte*) 1024 [phi:main/main::@1->main::@1#0] -- pbuz1=pbuc1 + lda #<$400 + sta.z sc + lda #>$400 + sta.z sc+1 + // main::@1 + __b1: + // for(char* sc = 0x0400;sc<0x0800; sc++) + // [2] if(main::sc#2<$800) goto main::@2 -- pbuz1_lt_vwuc1_then_la1 + lda.z sc+1 + cmp #>$800 + bcc __b2 + bne !+ + lda.z sc + cmp #<$800 + bcc __b2 + !: + jmp __b3 + // main::@2 + __b2: + // (*sc)++; + // [3] *main::sc#2 = ++ *main::sc#2 -- _deref_pbuz1=_inc__deref_pbuz1 + ldy #0 + lda (sc),y + clc + adc #1 + sta (sc),y + // for(char* sc = 0x0400;sc<0x0800; sc++) + // [4] main::sc#1 = ++ main::sc#2 -- pbuz1=_inc_pbuz1 + inc.z sc + bne !+ + inc.z sc+1 + !: + // [1] phi from main::@2 to main::@1 [phi:main::@2->main::@1] + // [1] phi main::sc#2 = main::sc#1 [phi:main::@2->main::@1#0] -- register_copy + jmp __b1 +} + // File Data + diff --git a/src/test/ref/loop-problem3.sym b/src/test/ref/loop-problem3.sym new file mode 100644 index 000000000..4b7b40e5e --- /dev/null +++ b/src/test/ref/loop-problem3.sym @@ -0,0 +1,6 @@ +void main() +byte* main::sc +byte* main::sc#1 sc zp[2]:2 22.0 +byte* main::sc#2 sc zp[2]:2 48.33333333333332 + +zp[2]:2 [ main::sc#2 main::sc#1 ] diff --git a/src/test/ref/plus4-keyboard-test.asm b/src/test/ref/plus4-keyboard-test.asm index 20c4be140..4bf5d883a 100644 --- a/src/test/ref/plus4-keyboard-test.asm +++ b/src/test/ref/plus4-keyboard-test.asm @@ -18,18 +18,19 @@ main: { sei // memset(DEFAULT_SCREEN, ' ', 0x0400) jsr memset - __b1: - // for(char y=0;y<8;y++) - lda.z y - cmp #8 - bcc __b2 + __b5: lda #DEFAULT_SCREEN sta.z row+1 lda #0 sta.z y - jmp __b1 + __b1: + // for(char y=0;y<8;y++) + lda.z y + cmp #8 + bcc __b2 + jmp __b5 __b2: // 1<memset] memset_from_main: jsr memset - // [2] phi from main to main::@1 [phi:main->main::@1] + // [2] phi from main main::@1 to main::@1 [phi:main/main::@1->main::@1] __b1_from_main: + __b1_from___b1: + // [2] phi main::row#6 = DEFAULT_SCREEN [phi:main/main::@1->main::@1#0] -- pbuz1=pbuc1 + lda #DEFAULT_SCREEN + sta.z row+1 + // [2] phi main::y#2 = 0 [phi:main/main::@1->main::@1#1] -- vbuz1=vbuc1 + lda #0 + sta.z y jmp __b1 // main::@1 __b1: @@ -582,17 +591,7 @@ main: { lda.z y cmp #8 bcc __b2 - // [2] phi from main::@1 to main::@1 [phi:main::@1->main::@1] - __b1_from___b1: - // [2] phi main::row#6 = DEFAULT_SCREEN [phi:main::@1->main::@1#0] -- pbuz1=pbuc1 - lda #DEFAULT_SCREEN - sta.z row+1 - // [2] phi main::y#2 = 0 [phi:main::@1->main::@1#1] -- vbuz1=vbuc1 - lda #0 - sta.z y - jmp __b1 + jmp __b1_from___b1 // main::@2 __b2: // [4] main::$2 = 1 << main::y#2 -- vbuaa=vbuc1_rol_vbuz1 @@ -734,7 +733,6 @@ Succesful ASM optimization Pass5NextJumpElimination Removing instruction __b1_from_main: Succesful ASM optimization Pass5RedundantLabelElimination Removing instruction memset_from_main: -Removing instruction __b1_from___b1: Removing instruction __b3_from___b2: Removing instruction __b5: Removing instruction __b1_from___b5: @@ -744,6 +742,8 @@ Removing instruction __b1_from_memset: Removing instruction __breturn: Removing instruction __b1_from___b2: Succesful ASM optimization Pass5UnusedLabelElimination +Relabelling long label __b1_from___b1 to __b5 +Succesful ASM optimization Pass5RelabelLongLabels FINAL SYMBOL TABLE const nomodify byte* DEFAULT_SCREEN = (byte*) 3072 @@ -820,7 +820,16 @@ main: { // [1] call memset // [18] phi from main to memset [phi:main->memset] jsr memset - // [2] phi from main to main::@1 [phi:main->main::@1] + // [2] phi from main main::@1 to main::@1 [phi:main/main::@1->main::@1] + __b5: + // [2] phi main::row#6 = DEFAULT_SCREEN [phi:main/main::@1->main::@1#0] -- pbuz1=pbuc1 + lda #DEFAULT_SCREEN + sta.z row+1 + // [2] phi main::y#2 = 0 [phi:main/main::@1->main::@1#1] -- vbuz1=vbuc1 + lda #0 + sta.z y // main::@1 __b1: // for(char y=0;y<8;y++) @@ -828,16 +837,7 @@ main: { lda.z y cmp #8 bcc __b2 - // [2] phi from main::@1 to main::@1 [phi:main::@1->main::@1] - // [2] phi main::row#6 = DEFAULT_SCREEN [phi:main::@1->main::@1#0] -- pbuz1=pbuc1 - lda #DEFAULT_SCREEN - sta.z row+1 - // [2] phi main::y#2 = 0 [phi:main::@1->main::@1#1] -- vbuz1=vbuc1 - lda #0 - sta.z y - jmp __b1 + jmp __b5 // main::@2 __b2: // 1<