From b97d8300b36a9c43c28e4c8cacd77939937f7efc Mon Sep 17 00:00:00 2001 From: jespergravgaard Date: Sun, 25 Aug 2019 22:57:23 +0200 Subject: [PATCH] Added another example of loophead optimization giving wrong return value. #290 --- .../dk/camelot64/kickc/test/TestPrograms.java | 12 +- src/test/kc/loophead-problem-3.kc | 12 + src/test/ref/loophead-problem-3.asm | 79 ++ src/test/ref/loophead-problem-3.cfg | 49 + src/test/ref/loophead-problem-3.log | 964 ++++++++++++++++++ src/test/ref/loophead-problem-3.sym | 44 + 6 files changed, 1156 insertions(+), 4 deletions(-) create mode 100644 src/test/kc/loophead-problem-3.kc create mode 100644 src/test/ref/loophead-problem-3.asm create mode 100644 src/test/ref/loophead-problem-3.cfg create mode 100644 src/test/ref/loophead-problem-3.log create mode 100644 src/test/ref/loophead-problem-3.sym diff --git a/src/test/java/dk/camelot64/kickc/test/TestPrograms.java b/src/test/java/dk/camelot64/kickc/test/TestPrograms.java index 56170ae45..7236a7983 100644 --- a/src/test/java/dk/camelot64/kickc/test/TestPrograms.java +++ b/src/test/java/dk/camelot64/kickc/test/TestPrograms.java @@ -42,11 +42,10 @@ public class TestPrograms { compileAndCompare("optimize-unsigned-comparisons"); } - - // TODO: Fix loop head problem! https://gitlab.com/camelot/kickc/issues/261 + // TODO: Fix loop head problem! https://gitlab.com/camelot/kickc/issues/290 @Test - public void testLoopheadProblem() throws IOException, URISyntaxException { - compileAndCompare("loophead-problem"); + public void testLoopheadProblem3() throws IOException, URISyntaxException { + compileAndCompare("loophead-problem-3"); } // TODO: Fix loop head problem! https://gitlab.com/camelot/kickc/issues/290 @@ -55,6 +54,11 @@ public class TestPrograms { compileAndCompare("loophead-problem-2"); } + // TODO: Fix loop head problem! https://gitlab.com/camelot/kickc/issues/261 + @Test + public void testLoopheadProblem() throws IOException, URISyntaxException { + compileAndCompare("loophead-problem"); + } @Test public void testAsmMnemonicNames() throws IOException, URISyntaxException { diff --git a/src/test/kc/loophead-problem-3.kc b/src/test/kc/loophead-problem-3.kc new file mode 100644 index 000000000..8982bb266 --- /dev/null +++ b/src/test/kc/loophead-problem-3.kc @@ -0,0 +1,12 @@ +// Program where loop-head optimization produces wrong return value +// Reported by Richard-William Loerakker + +import "c64" +import "multiply" + +void main() { + dword result = mul16u(4,123); + word kaputt = kaputt; +} \ No newline at end of file diff --git a/src/test/ref/loophead-problem-3.asm b/src/test/ref/loophead-problem-3.asm new file mode 100644 index 000000000..83bdf0e8a --- /dev/null +++ b/src/test/ref/loophead-problem-3.asm @@ -0,0 +1,79 @@ +// Program where loop-head optimization produces wrong return value +// Reported by Richard-William Loerakker +.pc = $801 "Basic" +:BasicUpstart(main) +.pc = $80d "Program" + .label BORDERCOL = $d020 + .label BGCOL = $d021 +main: { + .label result = 2 + .label kaputt = $a + jsr mul16u + lda.z result + sta.z kaputt + lda.z result+1 + sta.z kaputt+1 + lda.z kaputt + sta BORDERCOL + lda.z kaputt+1 + sta BGCOL + rts +} +// Perform binary multiplication of two unsigned 16-bit words into a 32-bit unsigned double word +// mul16u(word zeropage($a) a) +mul16u: { + .const b = $7b + .label a = $a + .label mb = 6 + .label res = 2 + .label return = 2 + lda #b + sta.z mb+1 + lda #>$10 + sta.z mb+2 + lda #>b>>$10 + sta.z mb+3 + lda #0 + sta.z res + sta.z res+1 + sta.z res+2 + sta.z res+3 + lda #<4 + sta.z a + lda #>4 + sta.z a+1 + b1: + lda.z a + bne b2 + lda.z a+1 + bne b2 + rts + b2: + lda #1 + and.z a + cmp #0 + beq b3 + lda.z res + clc + adc.z mb + sta.z res + lda.z res+1 + adc.z mb+1 + sta.z res+1 + lda.z res+2 + adc.z mb+2 + sta.z res+2 + lda.z res+3 + adc.z mb+3 + sta.z res+3 + b3: + lsr.z a+1 + ror.z a + asl.z mb + rol.z mb+1 + rol.z mb+2 + rol.z mb+3 + jmp b1 +} diff --git a/src/test/ref/loophead-problem-3.cfg b/src/test/ref/loophead-problem-3.cfg new file mode 100644 index 000000000..a9f0718d8 --- /dev/null +++ b/src/test/ref/loophead-problem-3.cfg @@ -0,0 +1,49 @@ +@begin: scope:[] from + [0] phi() + to:@1 +@1: scope:[] from @begin + [1] phi() + [2] call main + to:@end +@end: scope:[] from @1 + [3] phi() +main: scope:[main] from @1 + [4] phi() + [5] call mul16u + [6] (dword) mul16u::return#2 ← (dword) mul16u::res#2 + to:main::@1 +main::@1: scope:[main] from main + [7] (dword) main::result#0 ← (dword) mul16u::return#2 + [8] (word) main::kaputt#0 ← < (dword) main::result#0 + [9] (byte~) main::$2 ← < (word) main::kaputt#0 + [10] *((const byte*) BORDERCOL#0) ← (byte~) main::$2 + [11] (byte~) main::$3 ← > (word) main::kaputt#0 + [12] *((const byte*) BGCOL#0) ← (byte~) main::$3 + to:main::@return +main::@return: scope:[main] from main::@1 + [13] return + to:@return +mul16u: scope:[mul16u] from main + [14] phi() + to:mul16u::@1 +mul16u::@1: scope:[mul16u] from mul16u mul16u::@3 + [15] (dword) mul16u::mb#2 ← phi( mul16u/(const word) mul16u::b#0 mul16u::@3/(dword) mul16u::mb#1 ) + [15] (dword) mul16u::res#2 ← phi( mul16u/(byte) 0 mul16u::@3/(dword) mul16u::res#6 ) + [15] (word) mul16u::a#2 ← phi( mul16u/(byte) 4 mul16u::@3/(word) mul16u::a#0 ) + [16] if((word) mul16u::a#2!=(byte) 0) goto mul16u::@2 + to:mul16u::@return +mul16u::@return: scope:[mul16u] from mul16u::@1 + [17] return + to:@return +mul16u::@2: scope:[mul16u] from mul16u::@1 + [18] (byte~) mul16u::$1 ← (word) mul16u::a#2 & (byte) 1 + [19] if((byte~) mul16u::$1==(byte) 0) goto mul16u::@3 + to:mul16u::@4 +mul16u::@4: scope:[mul16u] from mul16u::@2 + [20] (dword) mul16u::res#1 ← (dword) mul16u::res#2 + (dword) mul16u::mb#2 + to:mul16u::@3 +mul16u::@3: scope:[mul16u] from mul16u::@2 mul16u::@4 + [21] (dword) mul16u::res#6 ← phi( mul16u::@2/(dword) mul16u::res#2 mul16u::@4/(dword) mul16u::res#1 ) + [22] (word) mul16u::a#0 ← (word) mul16u::a#2 >> (byte) 1 + [23] (dword) mul16u::mb#1 ← (dword) mul16u::mb#2 << (byte) 1 + to:mul16u::@1 diff --git a/src/test/ref/loophead-problem-3.log b/src/test/ref/loophead-problem-3.log new file mode 100644 index 000000000..60481baa4 --- /dev/null +++ b/src/test/ref/loophead-problem-3.log @@ -0,0 +1,964 @@ +Inlined call (byte~) vicSelectGfxBank::$0 ← call toDd00 (byte*) vicSelectGfxBank::gfx +Culled Empty Block (label) @1 +Culled Empty Block (label) @2 +Culled Empty Block (label) @3 +Culled Empty Block (label) @4 +Culled Empty Block (label) @5 +Culled Empty Block (label) @6 +Culled Empty Block (label) @7 +Culled Empty Block (label) mul16u::@5 +Culled Empty Block (label) mul16u::@6 +Culled Empty Block (label) mul16u::@8 +Culled Empty Block (label) mul16u::@9 +Culled Empty Block (label) @8 +Culled Empty Block (label) @9 + +CONTROL FLOW GRAPH SSA +@begin: scope:[] from + (byte*) BORDERCOL#0 ← ((byte*)) (number) $d020 + (byte*) BGCOL#0 ← ((byte*)) (number) $d021 + to:@10 +mul16u: scope:[mul16u] from main + (word) mul16u::a#5 ← phi( main/(word) mul16u::a#1 ) + (word) mul16u::b#1 ← phi( main/(word) mul16u::b#0 ) + (dword) mul16u::res#0 ← (number) 0 + (dword) mul16u::mb#0 ← (word) mul16u::b#1 + to:mul16u::@1 +mul16u::@1: scope:[mul16u] from mul16u mul16u::@4 + (dword) mul16u::mb#5 ← phi( mul16u/(dword) mul16u::mb#0 mul16u::@4/(dword) mul16u::mb#1 ) + (dword) mul16u::res#4 ← phi( mul16u/(dword) mul16u::res#0 mul16u::@4/(dword) mul16u::res#6 ) + (word) mul16u::a#2 ← phi( mul16u/(word) mul16u::a#5 mul16u::@4/(word) mul16u::a#0 ) + (bool~) mul16u::$0 ← (word) mul16u::a#2 != (number) 0 + if((bool~) mul16u::$0) goto mul16u::@2 + to:mul16u::@3 +mul16u::@2: scope:[mul16u] from mul16u::@1 + (dword) mul16u::res#5 ← phi( mul16u::@1/(dword) mul16u::res#4 ) + (dword) mul16u::mb#4 ← phi( mul16u::@1/(dword) mul16u::mb#5 ) + (word) mul16u::a#3 ← phi( mul16u::@1/(word) mul16u::a#2 ) + (number~) mul16u::$1 ← (word) mul16u::a#3 & (number) 1 + (bool~) mul16u::$2 ← (number~) mul16u::$1 != (number) 0 + (bool~) mul16u::$3 ← ! (bool~) mul16u::$2 + if((bool~) mul16u::$3) goto mul16u::@4 + to:mul16u::@7 +mul16u::@3: scope:[mul16u] from mul16u::@1 + (dword) mul16u::res#2 ← phi( mul16u::@1/(dword) mul16u::res#4 ) + (dword) mul16u::return#0 ← (dword) mul16u::res#2 + to:mul16u::@return +mul16u::@4: scope:[mul16u] from mul16u::@2 mul16u::@7 + (dword) mul16u::res#6 ← phi( mul16u::@2/(dword) mul16u::res#5 mul16u::@7/(dword) mul16u::res#1 ) + (dword) mul16u::mb#2 ← phi( mul16u::@2/(dword) mul16u::mb#4 mul16u::@7/(dword) mul16u::mb#3 ) + (word) mul16u::a#4 ← phi( mul16u::@2/(word) mul16u::a#3 mul16u::@7/(word) mul16u::a#6 ) + (word~) mul16u::$5 ← (word) mul16u::a#4 >> (number) 1 + (word) mul16u::a#0 ← (word~) mul16u::$5 + (dword~) mul16u::$6 ← (dword) mul16u::mb#2 << (number) 1 + (dword) mul16u::mb#1 ← (dword~) mul16u::$6 + to:mul16u::@1 +mul16u::@7: scope:[mul16u] from mul16u::@2 + (word) mul16u::a#6 ← phi( mul16u::@2/(word) mul16u::a#3 ) + (dword) mul16u::mb#3 ← phi( mul16u::@2/(dword) mul16u::mb#4 ) + (dword) mul16u::res#3 ← phi( mul16u::@2/(dword) mul16u::res#5 ) + (dword~) mul16u::$4 ← (dword) mul16u::res#3 + (dword) mul16u::mb#3 + (dword) mul16u::res#1 ← (dword~) mul16u::$4 + to:mul16u::@4 +mul16u::@return: scope:[mul16u] from mul16u::@3 + (dword) mul16u::return#3 ← phi( mul16u::@3/(dword) mul16u::return#0 ) + (dword) mul16u::return#1 ← (dword) mul16u::return#3 + return + to:@return +main: scope:[main] from @10 + (word) mul16u::a#1 ← (number) 4 + (word) mul16u::b#0 ← (number) $7b + call mul16u + (dword) mul16u::return#2 ← (dword) mul16u::return#1 + to:main::@1 +main::@1: scope:[main] from main + (dword) mul16u::return#4 ← phi( main/(dword) mul16u::return#2 ) + (dword~) main::$0 ← (dword) mul16u::return#4 + (dword) main::result#0 ← (dword~) main::$0 + (word~) main::$1 ← < (dword) main::result#0 + (word) main::kaputt#0 ← (word~) main::$1 + (byte~) main::$2 ← < (word) main::kaputt#0 + *((byte*) BORDERCOL#0) ← (byte~) main::$2 + (byte~) main::$3 ← > (word) main::kaputt#0 + *((byte*) BGCOL#0) ← (byte~) main::$3 + to:main::@return +main::@return: scope:[main] from main::@1 + return + to:@return +@10: scope:[] from @begin + call main + to:@11 +@11: scope:[] from @10 + to:@end +@end: scope:[] from @11 + +SYMBOL TABLE SSA +(label) @10 +(label) @11 +(label) @begin +(label) @end +(byte*) BGCOL +(byte*) BGCOL#0 +(byte*) BORDERCOL +(byte*) BORDERCOL#0 +(void()) main() +(dword~) main::$0 +(word~) main::$1 +(byte~) main::$2 +(byte~) main::$3 +(label) main::@1 +(label) main::@return +(word) main::kaputt +(word) main::kaputt#0 +(dword) main::result +(dword) main::result#0 +(dword()) mul16u((word) mul16u::a , (word) mul16u::b) +(bool~) mul16u::$0 +(number~) mul16u::$1 +(bool~) mul16u::$2 +(bool~) mul16u::$3 +(dword~) mul16u::$4 +(word~) mul16u::$5 +(dword~) mul16u::$6 +(label) mul16u::@1 +(label) mul16u::@2 +(label) mul16u::@3 +(label) mul16u::@4 +(label) mul16u::@7 +(label) mul16u::@return +(word) mul16u::a +(word) mul16u::a#0 +(word) mul16u::a#1 +(word) mul16u::a#2 +(word) mul16u::a#3 +(word) mul16u::a#4 +(word) mul16u::a#5 +(word) mul16u::a#6 +(word) mul16u::b +(word) mul16u::b#0 +(word) mul16u::b#1 +(dword) mul16u::mb +(dword) mul16u::mb#0 +(dword) mul16u::mb#1 +(dword) mul16u::mb#2 +(dword) mul16u::mb#3 +(dword) mul16u::mb#4 +(dword) mul16u::mb#5 +(dword) mul16u::res +(dword) mul16u::res#0 +(dword) mul16u::res#1 +(dword) mul16u::res#2 +(dword) mul16u::res#3 +(dword) mul16u::res#4 +(dword) mul16u::res#5 +(dword) mul16u::res#6 +(dword) mul16u::return +(dword) mul16u::return#0 +(dword) mul16u::return#1 +(dword) mul16u::return#2 +(dword) mul16u::return#3 +(dword) mul16u::return#4 + +Adding number conversion cast (unumber) 0 in (dword) mul16u::res#0 ← (number) 0 +Adding number conversion cast (unumber) 0 in (bool~) mul16u::$0 ← (word) mul16u::a#2 != (number) 0 +Adding number conversion cast (unumber) 1 in (number~) mul16u::$1 ← (word) mul16u::a#3 & (number) 1 +Adding number conversion cast (unumber) mul16u::$1 in (number~) mul16u::$1 ← (word) mul16u::a#3 & (unumber)(number) 1 +Adding number conversion cast (unumber) 0 in (bool~) mul16u::$2 ← (unumber~) mul16u::$1 != (number) 0 +Adding number conversion cast (unumber) 1 in (word~) mul16u::$5 ← (word) mul16u::a#4 >> (number) 1 +Adding number conversion cast (unumber) 1 in (dword~) mul16u::$6 ← (dword) mul16u::mb#2 << (number) 1 +Adding number conversion cast (unumber) 4 in (word) mul16u::a#1 ← (number) 4 +Adding number conversion cast (unumber) $7b in (word) mul16u::b#0 ← (number) $7b +Successful SSA optimization PassNAddNumberTypeConversions +Inlining cast (byte*) BORDERCOL#0 ← (byte*)(number) $d020 +Inlining cast (byte*) BGCOL#0 ← (byte*)(number) $d021 +Inlining cast (dword) mul16u::res#0 ← (unumber)(number) 0 +Inlining cast (word) mul16u::a#1 ← (unumber)(number) 4 +Inlining cast (word) mul16u::b#0 ← (unumber)(number) $7b +Successful SSA optimization Pass2InlineCast +Simplifying constant pointer cast (byte*) 53280 +Simplifying constant pointer cast (byte*) 53281 +Simplifying constant integer cast 0 +Simplifying constant integer cast 0 +Simplifying constant integer cast 1 +Simplifying constant integer cast 0 +Simplifying constant integer cast 1 +Simplifying constant integer cast 1 +Simplifying constant integer cast 4 +Simplifying constant integer cast $7b +Successful SSA optimization PassNCastSimplification +Finalized unsigned number type (byte) 0 +Finalized unsigned number type (byte) 0 +Finalized unsigned number type (byte) 1 +Finalized unsigned number type (byte) 0 +Finalized unsigned number type (byte) 1 +Finalized unsigned number type (byte) 1 +Finalized unsigned number type (byte) 4 +Finalized unsigned number type (byte) $7b +Successful SSA optimization PassNFinalizeNumberTypeConversions +Inferred type updated to byte in (unumber~) mul16u::$1 ← (word) mul16u::a#3 & (byte) 1 +Inversing boolean not [11] (bool~) mul16u::$3 ← (byte~) mul16u::$1 == (byte) 0 from [10] (bool~) mul16u::$2 ← (byte~) mul16u::$1 != (byte) 0 +Successful SSA optimization Pass2UnaryNotSimplification +Alias (dword) mul16u::mb#0 = (word) mul16u::b#1 +Alias (word) mul16u::a#2 = (word) mul16u::a#3 (word) mul16u::a#6 +Alias (dword) mul16u::mb#3 = (dword) mul16u::mb#4 (dword) mul16u::mb#5 +Alias (dword) mul16u::res#2 = (dword) mul16u::res#5 (dword) mul16u::res#4 (dword) mul16u::return#0 (dword) mul16u::res#3 (dword) mul16u::return#3 (dword) mul16u::return#1 +Alias (word) mul16u::a#0 = (word~) mul16u::$5 +Alias (dword) mul16u::mb#1 = (dword~) mul16u::$6 +Alias (dword) mul16u::res#1 = (dword~) mul16u::$4 +Alias (dword) mul16u::return#2 = (dword) mul16u::return#4 +Alias (dword) main::result#0 = (dword~) main::$0 +Alias (word) main::kaputt#0 = (word~) main::$1 +Successful SSA optimization Pass2AliasElimination +Alias (word) mul16u::a#2 = (word) mul16u::a#4 +Alias (dword) mul16u::mb#2 = (dword) mul16u::mb#3 +Successful SSA optimization Pass2AliasElimination +Identical Phi Values (dword) mul16u::mb#0 (word) mul16u::b#0 +Identical Phi Values (word) mul16u::a#5 (word) mul16u::a#1 +Successful SSA optimization Pass2IdenticalPhiElimination +Simple Condition (bool~) mul16u::$0 [7] if((word) mul16u::a#2!=(byte) 0) goto mul16u::@2 +Simple Condition (bool~) mul16u::$3 [12] if((byte~) mul16u::$1==(byte) 0) goto mul16u::@4 +Successful SSA optimization Pass2ConditionalJumpSimplification +Constant (const byte*) BORDERCOL#0 = (byte*) 53280 +Constant (const byte*) BGCOL#0 = (byte*) 53281 +Constant (const dword) mul16u::res#0 = 0 +Constant (const word) mul16u::a#1 = 4 +Constant (const word) mul16u::b#0 = $7b +Successful SSA optimization Pass2ConstantIdentification +Inlining constant with var siblings (const dword) mul16u::res#0 +Inlining constant with var siblings (const word) mul16u::a#1 +Constant inlined mul16u::res#0 = (byte) 0 +Constant inlined mul16u::a#1 = (byte) 4 +Successful SSA optimization Pass2ConstantInlining +Added new block during phi lifting mul16u::@10(between mul16u::@2 and mul16u::@4) +Adding NOP phi() at start of @begin +Adding NOP phi() at start of @10 +Adding NOP phi() at start of @11 +Adding NOP phi() at start of @end +Adding NOP phi() at start of main +Adding NOP phi() at start of mul16u +Adding NOP phi() at start of mul16u::@3 +CALL GRAPH +Calls in [] to main:2 +Calls in [main] to mul16u:6 + +Created 4 initial phi equivalence classes +Coalesced [23] mul16u::res#9 ← mul16u::res#1 +Coalesced [27] mul16u::a#7 ← mul16u::a#0 +Coalesced [28] mul16u::res#7 ← mul16u::res#6 +Coalesced [29] mul16u::mb#6 ← mul16u::mb#1 +Coalesced (already) [30] mul16u::res#8 ← mul16u::res#2 +Coalesced down to 3 phi equivalence classes +Culled Empty Block (label) @11 +Culled Empty Block (label) mul16u::@3 +Culled Empty Block (label) mul16u::@10 +Renumbering block @10 to @1 +Renumbering block mul16u::@4 to mul16u::@3 +Renumbering block mul16u::@7 to mul16u::@4 +Adding NOP phi() at start of @begin +Adding NOP phi() at start of @1 +Adding NOP phi() at start of @end +Adding NOP phi() at start of main +Adding NOP phi() at start of mul16u + +FINAL CONTROL FLOW GRAPH +@begin: scope:[] from + [0] phi() + to:@1 +@1: scope:[] from @begin + [1] phi() + [2] call main + to:@end +@end: scope:[] from @1 + [3] phi() +main: scope:[main] from @1 + [4] phi() + [5] call mul16u + [6] (dword) mul16u::return#2 ← (dword) mul16u::res#2 + to:main::@1 +main::@1: scope:[main] from main + [7] (dword) main::result#0 ← (dword) mul16u::return#2 + [8] (word) main::kaputt#0 ← < (dword) main::result#0 + [9] (byte~) main::$2 ← < (word) main::kaputt#0 + [10] *((const byte*) BORDERCOL#0) ← (byte~) main::$2 + [11] (byte~) main::$3 ← > (word) main::kaputt#0 + [12] *((const byte*) BGCOL#0) ← (byte~) main::$3 + to:main::@return +main::@return: scope:[main] from main::@1 + [13] return + to:@return +mul16u: scope:[mul16u] from main + [14] phi() + to:mul16u::@1 +mul16u::@1: scope:[mul16u] from mul16u mul16u::@3 + [15] (dword) mul16u::mb#2 ← phi( mul16u/(const word) mul16u::b#0 mul16u::@3/(dword) mul16u::mb#1 ) + [15] (dword) mul16u::res#2 ← phi( mul16u/(byte) 0 mul16u::@3/(dword) mul16u::res#6 ) + [15] (word) mul16u::a#2 ← phi( mul16u/(byte) 4 mul16u::@3/(word) mul16u::a#0 ) + [16] if((word) mul16u::a#2!=(byte) 0) goto mul16u::@2 + to:mul16u::@return +mul16u::@return: scope:[mul16u] from mul16u::@1 + [17] return + to:@return +mul16u::@2: scope:[mul16u] from mul16u::@1 + [18] (byte~) mul16u::$1 ← (word) mul16u::a#2 & (byte) 1 + [19] if((byte~) mul16u::$1==(byte) 0) goto mul16u::@3 + to:mul16u::@4 +mul16u::@4: scope:[mul16u] from mul16u::@2 + [20] (dword) mul16u::res#1 ← (dword) mul16u::res#2 + (dword) mul16u::mb#2 + to:mul16u::@3 +mul16u::@3: scope:[mul16u] from mul16u::@2 mul16u::@4 + [21] (dword) mul16u::res#6 ← phi( mul16u::@2/(dword) mul16u::res#2 mul16u::@4/(dword) mul16u::res#1 ) + [22] (word) mul16u::a#0 ← (word) mul16u::a#2 >> (byte) 1 + [23] (dword) mul16u::mb#1 ← (dword) mul16u::mb#2 << (byte) 1 + to:mul16u::@1 + + +VARIABLE REGISTER WEIGHTS +(byte*) BGCOL +(byte*) BORDERCOL +(void()) main() +(byte~) main::$2 4.0 +(byte~) main::$3 4.0 +(word) main::kaputt +(word) main::kaputt#0 2.0 +(dword) main::result +(dword) main::result#0 4.0 +(dword()) mul16u((word) mul16u::a , (word) mul16u::b) +(byte~) mul16u::$1 22.0 +(word) mul16u::a +(word) mul16u::a#0 11.0 +(word) mul16u::a#2 7.333333333333333 +(word) mul16u::b +(dword) mul16u::mb +(dword) mul16u::mb#1 22.0 +(dword) mul16u::mb#2 4.714285714285714 +(dword) mul16u::res +(dword) mul16u::res#1 22.0 +(dword) mul16u::res#2 5.833333333333333 +(dword) mul16u::res#6 11.0 +(dword) mul16u::return +(dword) mul16u::return#2 4.0 + +Initial phi equivalence classes +[ mul16u::a#2 mul16u::a#0 ] +[ mul16u::res#2 mul16u::res#6 mul16u::res#1 ] +[ mul16u::mb#2 mul16u::mb#1 ] +Added variable mul16u::return#2 to zero page equivalence class [ mul16u::return#2 ] +Added variable main::result#0 to zero page equivalence class [ main::result#0 ] +Added variable main::kaputt#0 to zero page equivalence class [ main::kaputt#0 ] +Added variable main::$2 to zero page equivalence class [ main::$2 ] +Added variable main::$3 to zero page equivalence class [ main::$3 ] +Added variable mul16u::$1 to zero page equivalence class [ mul16u::$1 ] +Complete equivalence classes +[ mul16u::a#2 mul16u::a#0 ] +[ mul16u::res#2 mul16u::res#6 mul16u::res#1 ] +[ mul16u::mb#2 mul16u::mb#1 ] +[ mul16u::return#2 ] +[ main::result#0 ] +[ main::kaputt#0 ] +[ main::$2 ] +[ main::$3 ] +[ mul16u::$1 ] +Allocated zp ZP_WORD:2 [ mul16u::a#2 mul16u::a#0 ] +Allocated zp ZP_DWORD:4 [ mul16u::res#2 mul16u::res#6 mul16u::res#1 ] +Allocated zp ZP_DWORD:8 [ mul16u::mb#2 mul16u::mb#1 ] +Allocated zp ZP_DWORD:12 [ mul16u::return#2 ] +Allocated zp ZP_DWORD:16 [ main::result#0 ] +Allocated zp ZP_WORD:20 [ main::kaputt#0 ] +Allocated zp ZP_BYTE:22 [ main::$2 ] +Allocated zp ZP_BYTE:23 [ main::$3 ] +Allocated zp ZP_BYTE:24 [ mul16u::$1 ] + +INITIAL ASM +Target platform is c64basic + // File Comments +// Program where loop-head optimization produces wrong return value +// Reported by Richard-William Loerakker + // Upstart +.pc = $801 "Basic" +:BasicUpstart(bbegin) +.pc = $80d "Program" + // Global Constants & labels + .label BORDERCOL = $d020 + .label BGCOL = $d021 + // @begin +bbegin: + // [1] phi from @begin to @1 [phi:@begin->@1] +b1_from_bbegin: + jmp b1 + // @1 +b1: + // [2] call main + // [4] phi from @1 to main [phi:@1->main] +main_from_b1: + jsr main + // [3] phi from @1 to @end [phi:@1->@end] +bend_from_b1: + jmp bend + // @end +bend: + // main +main: { + .label _2 = $16 + .label _3 = $17 + .label result = $10 + .label kaputt = $14 + // [5] call mul16u + // [14] phi from main to mul16u [phi:main->mul16u] + mul16u_from_main: + jsr mul16u + // [6] (dword) mul16u::return#2 ← (dword) mul16u::res#2 -- vduz1=vduz2 + lda.z mul16u.res + sta.z mul16u.return + lda.z mul16u.res+1 + sta.z mul16u.return+1 + lda.z mul16u.res+2 + sta.z mul16u.return+2 + lda.z mul16u.res+3 + sta.z mul16u.return+3 + jmp b1 + // main::@1 + b1: + // [7] (dword) main::result#0 ← (dword) mul16u::return#2 -- vduz1=vduz2 + lda.z mul16u.return + sta.z result + lda.z mul16u.return+1 + sta.z result+1 + lda.z mul16u.return+2 + sta.z result+2 + lda.z mul16u.return+3 + sta.z result+3 + // [8] (word) main::kaputt#0 ← < (dword) main::result#0 -- vwuz1=_lo_vduz2 + lda.z result + sta.z kaputt + lda.z result+1 + sta.z kaputt+1 + // [9] (byte~) main::$2 ← < (word) main::kaputt#0 -- vbuz1=_lo_vwuz2 + lda.z kaputt + sta.z _2 + // [10] *((const byte*) BORDERCOL#0) ← (byte~) main::$2 -- _deref_pbuc1=vbuz1 + lda.z _2 + sta BORDERCOL + // [11] (byte~) main::$3 ← > (word) main::kaputt#0 -- vbuz1=_hi_vwuz2 + lda.z kaputt+1 + sta.z _3 + // [12] *((const byte*) BGCOL#0) ← (byte~) main::$3 -- _deref_pbuc1=vbuz1 + lda.z _3 + sta BGCOL + jmp breturn + // main::@return + breturn: + // [13] return + rts +} + // mul16u +// Perform binary multiplication of two unsigned 16-bit words into a 32-bit unsigned double word +// mul16u(word zeropage(2) a) +mul16u: { + .const b = $7b + .label _1 = $18 + .label a = 2 + .label mb = 8 + .label res = 4 + .label return = $c + // [15] phi from mul16u to mul16u::@1 [phi:mul16u->mul16u::@1] + b1_from_mul16u: + // [15] phi (dword) mul16u::mb#2 = (const word) mul16u::b#0 [phi:mul16u->mul16u::@1#0] -- vduz1=vduc1 + lda #b + sta.z mb+1 + lda #>$10 + sta.z mb+2 + lda #>b>>$10 + sta.z mb+3 + // [15] phi (dword) mul16u::res#2 = (byte) 0 [phi:mul16u->mul16u::@1#1] -- vduz1=vbuc1 + lda #0 + sta.z res + lda #0 + sta.z res+1 + sta.z res+2 + sta.z res+3 + // [15] phi (word) mul16u::a#2 = (byte) 4 [phi:mul16u->mul16u::@1#2] -- vwuz1=vbuc1 + lda #<4 + sta.z a + lda #>4 + sta.z a+1 + jmp b1 + // mul16u::@1 + b1: + // [16] if((word) mul16u::a#2!=(byte) 0) goto mul16u::@2 -- vwuz1_neq_0_then_la1 + lda.z a + bne b2 + lda.z a+1 + bne b2 + jmp breturn + // mul16u::@return + breturn: + // [17] return + rts + // mul16u::@2 + b2: + // [18] (byte~) mul16u::$1 ← (word) mul16u::a#2 & (byte) 1 -- vbuz1=vwuz2_band_vbuc1 + lda #1 + and.z a + sta.z _1 + // [19] if((byte~) mul16u::$1==(byte) 0) goto mul16u::@3 -- vbuz1_eq_0_then_la1 + lda.z _1 + cmp #0 + beq b3_from_b2 + jmp b4 + // mul16u::@4 + b4: + // [20] (dword) mul16u::res#1 ← (dword) mul16u::res#2 + (dword) mul16u::mb#2 -- vduz1=vduz1_plus_vduz2 + lda.z res + clc + adc.z mb + sta.z res + lda.z res+1 + adc.z mb+1 + sta.z res+1 + lda.z res+2 + adc.z mb+2 + sta.z res+2 + lda.z res+3 + adc.z mb+3 + sta.z res+3 + // [21] phi from mul16u::@2 mul16u::@4 to mul16u::@3 [phi:mul16u::@2/mul16u::@4->mul16u::@3] + b3_from_b2: + b3_from_b4: + // [21] phi (dword) mul16u::res#6 = (dword) mul16u::res#2 [phi:mul16u::@2/mul16u::@4->mul16u::@3#0] -- register_copy + jmp b3 + // mul16u::@3 + b3: + // [22] (word) mul16u::a#0 ← (word) mul16u::a#2 >> (byte) 1 -- vwuz1=vwuz1_ror_1 + lsr.z a+1 + ror.z a + // [23] (dword) mul16u::mb#1 ← (dword) mul16u::mb#2 << (byte) 1 -- vduz1=vduz1_rol_1 + asl.z mb + rol.z mb+1 + rol.z mb+2 + rol.z mb+3 + // [15] phi from mul16u::@3 to mul16u::@1 [phi:mul16u::@3->mul16u::@1] + b1_from_b3: + // [15] phi (dword) mul16u::mb#2 = (dword) mul16u::mb#1 [phi:mul16u::@3->mul16u::@1#0] -- register_copy + // [15] phi (dword) mul16u::res#2 = (dword) mul16u::res#6 [phi:mul16u::@3->mul16u::@1#1] -- register_copy + // [15] phi (word) mul16u::a#2 = (word) mul16u::a#0 [phi:mul16u::@3->mul16u::@1#2] -- register_copy + jmp b1 +} + // File Data + +REGISTER UPLIFT POTENTIAL REGISTERS +Statement [6] (dword) mul16u::return#2 ← (dword) mul16u::res#2 [ mul16u::return#2 ] ( main:2 [ mul16u::return#2 ] ) always clobbers reg byte a +Statement [7] (dword) main::result#0 ← (dword) mul16u::return#2 [ main::result#0 ] ( main:2 [ main::result#0 ] ) always clobbers reg byte a +Statement [8] (word) main::kaputt#0 ← < (dword) main::result#0 [ main::kaputt#0 ] ( main:2 [ main::kaputt#0 ] ) always clobbers reg byte a +Statement [9] (byte~) main::$2 ← < (word) main::kaputt#0 [ main::kaputt#0 main::$2 ] ( main:2 [ main::kaputt#0 main::$2 ] ) always clobbers reg byte a +Statement [11] (byte~) main::$3 ← > (word) main::kaputt#0 [ main::$3 ] ( main:2 [ main::$3 ] ) always clobbers reg byte a +Statement [16] if((word) mul16u::a#2!=(byte) 0) goto mul16u::@2 [ mul16u::res#2 mul16u::a#2 mul16u::mb#2 ] ( main:2::mul16u:5 [ mul16u::res#2 mul16u::a#2 mul16u::mb#2 ] ) always clobbers reg byte a +Statement [18] (byte~) mul16u::$1 ← (word) mul16u::a#2 & (byte) 1 [ mul16u::res#2 mul16u::a#2 mul16u::mb#2 mul16u::$1 ] ( main:2::mul16u:5 [ mul16u::res#2 mul16u::a#2 mul16u::mb#2 mul16u::$1 ] ) always clobbers reg byte a +Statement [20] (dword) mul16u::res#1 ← (dword) mul16u::res#2 + (dword) mul16u::mb#2 [ mul16u::a#2 mul16u::mb#2 mul16u::res#1 ] ( main:2::mul16u:5 [ mul16u::a#2 mul16u::mb#2 mul16u::res#1 ] ) always clobbers reg byte a +Potential registers zp ZP_WORD:2 [ mul16u::a#2 mul16u::a#0 ] : zp ZP_WORD:2 , +Potential registers zp ZP_DWORD:4 [ mul16u::res#2 mul16u::res#6 mul16u::res#1 ] : zp ZP_DWORD:4 , +Potential registers zp ZP_DWORD:8 [ mul16u::mb#2 mul16u::mb#1 ] : zp ZP_DWORD:8 , +Potential registers zp ZP_DWORD:12 [ mul16u::return#2 ] : zp ZP_DWORD:12 , +Potential registers zp ZP_DWORD:16 [ main::result#0 ] : zp ZP_DWORD:16 , +Potential registers zp ZP_WORD:20 [ main::kaputt#0 ] : zp ZP_WORD:20 , +Potential registers zp ZP_BYTE:22 [ main::$2 ] : zp ZP_BYTE:22 , reg byte a , reg byte x , reg byte y , +Potential registers zp ZP_BYTE:23 [ main::$3 ] : zp ZP_BYTE:23 , reg byte a , reg byte x , reg byte y , +Potential registers zp ZP_BYTE:24 [ mul16u::$1 ] : zp ZP_BYTE:24 , reg byte a , reg byte x , reg byte y , + +REGISTER UPLIFT SCOPES +Uplift Scope [mul16u] 38.83: zp ZP_DWORD:4 [ mul16u::res#2 mul16u::res#6 mul16u::res#1 ] 26.71: zp ZP_DWORD:8 [ mul16u::mb#2 mul16u::mb#1 ] 22: zp ZP_BYTE:24 [ mul16u::$1 ] 18.33: zp ZP_WORD:2 [ mul16u::a#2 mul16u::a#0 ] 4: zp ZP_DWORD:12 [ mul16u::return#2 ] +Uplift Scope [main] 4: zp ZP_DWORD:16 [ main::result#0 ] 4: zp ZP_BYTE:22 [ main::$2 ] 4: zp ZP_BYTE:23 [ main::$3 ] 2: zp ZP_WORD:20 [ main::kaputt#0 ] +Uplift Scope [] + +Uplifting [mul16u] best 1617 combination zp ZP_DWORD:4 [ mul16u::res#2 mul16u::res#6 mul16u::res#1 ] zp ZP_DWORD:8 [ mul16u::mb#2 mul16u::mb#1 ] reg byte a [ mul16u::$1 ] zp ZP_WORD:2 [ mul16u::a#2 mul16u::a#0 ] zp ZP_DWORD:12 [ mul16u::return#2 ] +Uplifting [main] best 1605 combination zp ZP_DWORD:16 [ main::result#0 ] reg byte a [ main::$2 ] reg byte a [ main::$3 ] zp ZP_WORD:20 [ main::kaputt#0 ] +Uplifting [] best 1605 combination +Coalescing zero page register [ zp ZP_DWORD:4 [ mul16u::res#2 mul16u::res#6 mul16u::res#1 ] ] with [ zp ZP_DWORD:12 [ mul16u::return#2 ] ] - score: 1 +Coalescing zero page register [ zp ZP_DWORD:4 [ mul16u::res#2 mul16u::res#6 mul16u::res#1 mul16u::return#2 ] ] with [ zp ZP_DWORD:16 [ main::result#0 ] ] - score: 1 +Coalescing zero page register [ zp ZP_WORD:20 [ main::kaputt#0 ] ] with [ zp ZP_WORD:2 [ mul16u::a#2 mul16u::a#0 ] ] +Allocated (was zp ZP_DWORD:4) zp ZP_DWORD:2 [ mul16u::res#2 mul16u::res#6 mul16u::res#1 mul16u::return#2 main::result#0 ] +Allocated (was zp ZP_DWORD:8) zp ZP_DWORD:6 [ mul16u::mb#2 mul16u::mb#1 ] +Allocated (was zp ZP_WORD:20) zp ZP_WORD:10 [ main::kaputt#0 mul16u::a#2 mul16u::a#0 ] + +ASSEMBLER BEFORE OPTIMIZATION + // File Comments +// Program where loop-head optimization produces wrong return value +// Reported by Richard-William Loerakker + // Upstart +.pc = $801 "Basic" +:BasicUpstart(bbegin) +.pc = $80d "Program" + // Global Constants & labels + .label BORDERCOL = $d020 + .label BGCOL = $d021 + // @begin +bbegin: + // [1] phi from @begin to @1 [phi:@begin->@1] +b1_from_bbegin: + jmp b1 + // @1 +b1: + // [2] call main + // [4] phi from @1 to main [phi:@1->main] +main_from_b1: + jsr main + // [3] phi from @1 to @end [phi:@1->@end] +bend_from_b1: + jmp bend + // @end +bend: + // main +main: { + .label result = 2 + .label kaputt = $a + // [5] call mul16u + // [14] phi from main to mul16u [phi:main->mul16u] + mul16u_from_main: + jsr mul16u + // [6] (dword) mul16u::return#2 ← (dword) mul16u::res#2 + jmp b1 + // main::@1 + b1: + // [7] (dword) main::result#0 ← (dword) mul16u::return#2 + // [8] (word) main::kaputt#0 ← < (dword) main::result#0 -- vwuz1=_lo_vduz2 + lda.z result + sta.z kaputt + lda.z result+1 + sta.z kaputt+1 + // [9] (byte~) main::$2 ← < (word) main::kaputt#0 -- vbuaa=_lo_vwuz1 + lda.z kaputt + // [10] *((const byte*) BORDERCOL#0) ← (byte~) main::$2 -- _deref_pbuc1=vbuaa + sta BORDERCOL + // [11] (byte~) main::$3 ← > (word) main::kaputt#0 -- vbuaa=_hi_vwuz1 + lda.z kaputt+1 + // [12] *((const byte*) BGCOL#0) ← (byte~) main::$3 -- _deref_pbuc1=vbuaa + sta BGCOL + jmp breturn + // main::@return + breturn: + // [13] return + rts +} + // mul16u +// Perform binary multiplication of two unsigned 16-bit words into a 32-bit unsigned double word +// mul16u(word zeropage($a) a) +mul16u: { + .const b = $7b + .label a = $a + .label mb = 6 + .label res = 2 + .label return = 2 + // [15] phi from mul16u to mul16u::@1 [phi:mul16u->mul16u::@1] + b1_from_mul16u: + // [15] phi (dword) mul16u::mb#2 = (const word) mul16u::b#0 [phi:mul16u->mul16u::@1#0] -- vduz1=vduc1 + lda #b + sta.z mb+1 + lda #>$10 + sta.z mb+2 + lda #>b>>$10 + sta.z mb+3 + // [15] phi (dword) mul16u::res#2 = (byte) 0 [phi:mul16u->mul16u::@1#1] -- vduz1=vbuc1 + lda #0 + sta.z res + lda #0 + sta.z res+1 + sta.z res+2 + sta.z res+3 + // [15] phi (word) mul16u::a#2 = (byte) 4 [phi:mul16u->mul16u::@1#2] -- vwuz1=vbuc1 + lda #<4 + sta.z a + lda #>4 + sta.z a+1 + jmp b1 + // mul16u::@1 + b1: + // [16] if((word) mul16u::a#2!=(byte) 0) goto mul16u::@2 -- vwuz1_neq_0_then_la1 + lda.z a + bne b2 + lda.z a+1 + bne b2 + jmp breturn + // mul16u::@return + breturn: + // [17] return + rts + // mul16u::@2 + b2: + // [18] (byte~) mul16u::$1 ← (word) mul16u::a#2 & (byte) 1 -- vbuaa=vwuz1_band_vbuc1 + lda #1 + and.z a + // [19] if((byte~) mul16u::$1==(byte) 0) goto mul16u::@3 -- vbuaa_eq_0_then_la1 + cmp #0 + beq b3_from_b2 + jmp b4 + // mul16u::@4 + b4: + // [20] (dword) mul16u::res#1 ← (dword) mul16u::res#2 + (dword) mul16u::mb#2 -- vduz1=vduz1_plus_vduz2 + lda.z res + clc + adc.z mb + sta.z res + lda.z res+1 + adc.z mb+1 + sta.z res+1 + lda.z res+2 + adc.z mb+2 + sta.z res+2 + lda.z res+3 + adc.z mb+3 + sta.z res+3 + // [21] phi from mul16u::@2 mul16u::@4 to mul16u::@3 [phi:mul16u::@2/mul16u::@4->mul16u::@3] + b3_from_b2: + b3_from_b4: + // [21] phi (dword) mul16u::res#6 = (dword) mul16u::res#2 [phi:mul16u::@2/mul16u::@4->mul16u::@3#0] -- register_copy + jmp b3 + // mul16u::@3 + b3: + // [22] (word) mul16u::a#0 ← (word) mul16u::a#2 >> (byte) 1 -- vwuz1=vwuz1_ror_1 + lsr.z a+1 + ror.z a + // [23] (dword) mul16u::mb#1 ← (dword) mul16u::mb#2 << (byte) 1 -- vduz1=vduz1_rol_1 + asl.z mb + rol.z mb+1 + rol.z mb+2 + rol.z mb+3 + // [15] phi from mul16u::@3 to mul16u::@1 [phi:mul16u::@3->mul16u::@1] + b1_from_b3: + // [15] phi (dword) mul16u::mb#2 = (dword) mul16u::mb#1 [phi:mul16u::@3->mul16u::@1#0] -- register_copy + // [15] phi (dword) mul16u::res#2 = (dword) mul16u::res#6 [phi:mul16u::@3->mul16u::@1#1] -- register_copy + // [15] phi (word) mul16u::a#2 = (word) mul16u::a#0 [phi:mul16u::@3->mul16u::@1#2] -- register_copy + jmp b1 +} + // File Data + +ASSEMBLER OPTIMIZATIONS +Removing instruction jmp b1 +Removing instruction jmp bend +Removing instruction jmp b1 +Removing instruction jmp breturn +Removing instruction jmp b1 +Removing instruction jmp breturn +Removing instruction jmp b4 +Removing instruction jmp b3 +Succesful ASM optimization Pass5NextJumpElimination +Removing instruction lda #0 +Succesful ASM optimization Pass5UnnecesaryLoadElimination +Replacing label b3_from_b2 with b3 +Removing instruction b1_from_bbegin: +Removing instruction b1: +Removing instruction main_from_b1: +Removing instruction bend_from_b1: +Removing instruction b3_from_b2: +Removing instruction b3_from_b4: +Succesful ASM optimization Pass5RedundantLabelElimination +Removing instruction bend: +Removing instruction mul16u_from_main: +Removing instruction b1: +Removing instruction breturn: +Removing instruction b1_from_mul16u: +Removing instruction breturn: +Removing instruction b4: +Removing instruction b1_from_b3: +Succesful ASM optimization Pass5UnusedLabelElimination +Updating BasicUpstart to call main directly +Removing instruction jsr main +Succesful ASM optimization Pass5SkipBegin +Removing instruction bbegin: +Succesful ASM optimization Pass5UnusedLabelElimination + +FINAL SYMBOL TABLE +(label) @1 +(label) @begin +(label) @end +(byte*) BGCOL +(const byte*) BGCOL#0 BGCOL = (byte*) 53281 +(byte*) BORDERCOL +(const byte*) BORDERCOL#0 BORDERCOL = (byte*) 53280 +(void()) main() +(byte~) main::$2 reg byte a 4.0 +(byte~) main::$3 reg byte a 4.0 +(label) main::@1 +(label) main::@return +(word) main::kaputt +(word) main::kaputt#0 kaputt zp ZP_WORD:10 2.0 +(dword) main::result +(dword) main::result#0 result zp ZP_DWORD:2 4.0 +(dword()) mul16u((word) mul16u::a , (word) mul16u::b) +(byte~) mul16u::$1 reg byte a 22.0 +(label) mul16u::@1 +(label) mul16u::@2 +(label) mul16u::@3 +(label) mul16u::@4 +(label) mul16u::@return +(word) mul16u::a +(word) mul16u::a#0 a zp ZP_WORD:10 11.0 +(word) mul16u::a#2 a zp ZP_WORD:10 7.333333333333333 +(word) mul16u::b +(const word) mul16u::b#0 b = (byte) $7b +(dword) mul16u::mb +(dword) mul16u::mb#1 mb zp ZP_DWORD:6 22.0 +(dword) mul16u::mb#2 mb zp ZP_DWORD:6 4.714285714285714 +(dword) mul16u::res +(dword) mul16u::res#1 res zp ZP_DWORD:2 22.0 +(dword) mul16u::res#2 res zp ZP_DWORD:2 5.833333333333333 +(dword) mul16u::res#6 res zp ZP_DWORD:2 11.0 +(dword) mul16u::return +(dword) mul16u::return#2 return zp ZP_DWORD:2 4.0 + +zp ZP_DWORD:2 [ mul16u::res#2 mul16u::res#6 mul16u::res#1 mul16u::return#2 main::result#0 ] +zp ZP_DWORD:6 [ mul16u::mb#2 mul16u::mb#1 ] +zp ZP_WORD:10 [ main::kaputt#0 mul16u::a#2 mul16u::a#0 ] +reg byte a [ main::$2 ] +reg byte a [ main::$3 ] +reg byte a [ mul16u::$1 ] + + +FINAL ASSEMBLER +Score: 1399 + + // File Comments +// Program where loop-head optimization produces wrong return value +// Reported by Richard-William Loerakker + // Upstart +.pc = $801 "Basic" +:BasicUpstart(main) +.pc = $80d "Program" + // Global Constants & labels + .label BORDERCOL = $d020 + .label BGCOL = $d021 + // @begin + // [1] phi from @begin to @1 [phi:@begin->@1] + // @1 + // [2] call main + // [4] phi from @1 to main [phi:@1->main] + // [3] phi from @1 to @end [phi:@1->@end] + // @end + // main +main: { + .label result = 2 + .label kaputt = $a + // mul16u(4,123) + // [5] call mul16u + // [14] phi from main to mul16u [phi:main->mul16u] + jsr mul16u + // mul16u(4,123) + // [6] (dword) mul16u::return#2 ← (dword) mul16u::res#2 + // main::@1 + // result = mul16u(4,123) + // [7] (dword) main::result#0 ← (dword) mul16u::return#2 + // kaputt = kaputt + // [11] (byte~) main::$3 ← > (word) main::kaputt#0 -- vbuaa=_hi_vwuz1 + lda.z kaputt+1 + // *BGCOL = >kaputt + // [12] *((const byte*) BGCOL#0) ← (byte~) main::$3 -- _deref_pbuc1=vbuaa + sta BGCOL + // main::@return + // } + // [13] return + rts +} + // mul16u +// Perform binary multiplication of two unsigned 16-bit words into a 32-bit unsigned double word +// mul16u(word zeropage($a) a) +mul16u: { + .const b = $7b + .label a = $a + .label mb = 6 + .label res = 2 + .label return = 2 + // [15] phi from mul16u to mul16u::@1 [phi:mul16u->mul16u::@1] + // [15] phi (dword) mul16u::mb#2 = (const word) mul16u::b#0 [phi:mul16u->mul16u::@1#0] -- vduz1=vduc1 + lda #b + sta.z mb+1 + lda #>$10 + sta.z mb+2 + lda #>b>>$10 + sta.z mb+3 + // [15] phi (dword) mul16u::res#2 = (byte) 0 [phi:mul16u->mul16u::@1#1] -- vduz1=vbuc1 + lda #0 + sta.z res + sta.z res+1 + sta.z res+2 + sta.z res+3 + // [15] phi (word) mul16u::a#2 = (byte) 4 [phi:mul16u->mul16u::@1#2] -- vwuz1=vbuc1 + lda #<4 + sta.z a + lda #>4 + sta.z a+1 + // mul16u::@1 + b1: + // while(a!=0) + // [16] if((word) mul16u::a#2!=(byte) 0) goto mul16u::@2 -- vwuz1_neq_0_then_la1 + lda.z a + bne b2 + lda.z a+1 + bne b2 + // mul16u::@return + // } + // [17] return + rts + // mul16u::@2 + b2: + // a&1 + // [18] (byte~) mul16u::$1 ← (word) mul16u::a#2 & (byte) 1 -- vbuaa=vwuz1_band_vbuc1 + lda #1 + and.z a + // if( (a&1) != 0) + // [19] if((byte~) mul16u::$1==(byte) 0) goto mul16u::@3 -- vbuaa_eq_0_then_la1 + cmp #0 + beq b3 + // mul16u::@4 + // res = res + mb + // [20] (dword) mul16u::res#1 ← (dword) mul16u::res#2 + (dword) mul16u::mb#2 -- vduz1=vduz1_plus_vduz2 + lda.z res + clc + adc.z mb + sta.z res + lda.z res+1 + adc.z mb+1 + sta.z res+1 + lda.z res+2 + adc.z mb+2 + sta.z res+2 + lda.z res+3 + adc.z mb+3 + sta.z res+3 + // [21] phi from mul16u::@2 mul16u::@4 to mul16u::@3 [phi:mul16u::@2/mul16u::@4->mul16u::@3] + // [21] phi (dword) mul16u::res#6 = (dword) mul16u::res#2 [phi:mul16u::@2/mul16u::@4->mul16u::@3#0] -- register_copy + // mul16u::@3 + b3: + // a = a>>1 + // [22] (word) mul16u::a#0 ← (word) mul16u::a#2 >> (byte) 1 -- vwuz1=vwuz1_ror_1 + lsr.z a+1 + ror.z a + // mb = mb<<1 + // [23] (dword) mul16u::mb#1 ← (dword) mul16u::mb#2 << (byte) 1 -- vduz1=vduz1_rol_1 + asl.z mb + rol.z mb+1 + rol.z mb+2 + rol.z mb+3 + // [15] phi from mul16u::@3 to mul16u::@1 [phi:mul16u::@3->mul16u::@1] + // [15] phi (dword) mul16u::mb#2 = (dword) mul16u::mb#1 [phi:mul16u::@3->mul16u::@1#0] -- register_copy + // [15] phi (dword) mul16u::res#2 = (dword) mul16u::res#6 [phi:mul16u::@3->mul16u::@1#1] -- register_copy + // [15] phi (word) mul16u::a#2 = (word) mul16u::a#0 [phi:mul16u::@3->mul16u::@1#2] -- register_copy + jmp b1 +} + // File Data + diff --git a/src/test/ref/loophead-problem-3.sym b/src/test/ref/loophead-problem-3.sym new file mode 100644 index 000000000..8927fbd65 --- /dev/null +++ b/src/test/ref/loophead-problem-3.sym @@ -0,0 +1,44 @@ +(label) @1 +(label) @begin +(label) @end +(byte*) BGCOL +(const byte*) BGCOL#0 BGCOL = (byte*) 53281 +(byte*) BORDERCOL +(const byte*) BORDERCOL#0 BORDERCOL = (byte*) 53280 +(void()) main() +(byte~) main::$2 reg byte a 4.0 +(byte~) main::$3 reg byte a 4.0 +(label) main::@1 +(label) main::@return +(word) main::kaputt +(word) main::kaputt#0 kaputt zp ZP_WORD:10 2.0 +(dword) main::result +(dword) main::result#0 result zp ZP_DWORD:2 4.0 +(dword()) mul16u((word) mul16u::a , (word) mul16u::b) +(byte~) mul16u::$1 reg byte a 22.0 +(label) mul16u::@1 +(label) mul16u::@2 +(label) mul16u::@3 +(label) mul16u::@4 +(label) mul16u::@return +(word) mul16u::a +(word) mul16u::a#0 a zp ZP_WORD:10 11.0 +(word) mul16u::a#2 a zp ZP_WORD:10 7.333333333333333 +(word) mul16u::b +(const word) mul16u::b#0 b = (byte) $7b +(dword) mul16u::mb +(dword) mul16u::mb#1 mb zp ZP_DWORD:6 22.0 +(dword) mul16u::mb#2 mb zp ZP_DWORD:6 4.714285714285714 +(dword) mul16u::res +(dword) mul16u::res#1 res zp ZP_DWORD:2 22.0 +(dword) mul16u::res#2 res zp ZP_DWORD:2 5.833333333333333 +(dword) mul16u::res#6 res zp ZP_DWORD:2 11.0 +(dword) mul16u::return +(dword) mul16u::return#2 return zp ZP_DWORD:2 4.0 + +zp ZP_DWORD:2 [ mul16u::res#2 mul16u::res#6 mul16u::res#1 mul16u::return#2 main::result#0 ] +zp ZP_DWORD:6 [ mul16u::mb#2 mul16u::mb#1 ] +zp ZP_WORD:10 [ main::kaputt#0 mul16u::a#2 mul16u::a#0 ] +reg byte a [ main::$2 ] +reg byte a [ main::$3 ] +reg byte a [ mul16u::$1 ]