From 72f1a779f28227615cfbfbec9cd95a30f2f66557 Mon Sep 17 00:00:00 2001 From: Irmen de Jong Date: Sat, 4 Nov 2023 13:51:18 +0100 Subject: [PATCH] optimize monogfx.fill() and gfx2.fill(), also don't read outside screen area --- compiler/res/prog8lib/cx16/gfx2.p8 | 19 +++++++++++++++---- compiler/res/prog8lib/cx16/monogfx.p8 | 19 +++++++++++++++---- compiler/res/prog8lib/virtual/monogfx.p8 | 19 +++++++++++++++---- docs/source/todo.rst | 3 +-- virtualmachine/src/prog8/vm/GraphicsWindow.kt | 4 ++-- 5 files changed, 48 insertions(+), 16 deletions(-) diff --git a/compiler/res/prog8lib/cx16/gfx2.p8 b/compiler/res/prog8lib/cx16/gfx2.p8 index 99727590a..d1a133845 100644 --- a/compiler/res/prog8lib/cx16/gfx2.p8 +++ b/compiler/res/prog8lib/cx16/gfx2.p8 @@ -500,11 +500,13 @@ gfx2 { } } - sub fill(word @zp xx, word @zp yy, ubyte new_color) { + sub fill(uword x, uword y, ubyte new_color) { ; Non-recursive scanline flood fill. ; based loosely on code found here https://www.codeproject.com/Articles/6017/QuickFill-An-efficient-flood-fill-algorithm ; with the fixes applied to the seedfill_4 routine as mentioned in the comments. const ubyte MAXDEPTH = 48 + word @zp xx = x as word + word @zp yy = y as word word[MAXDEPTH] @split @shared stack_xl word[MAXDEPTH] @split @shared stack_xr word[MAXDEPTH] @split @shared stack_y @@ -584,8 +586,11 @@ gfx2 { pop_stack() xx = x1 ; TODO: if mode==1 (256c) use vera autodecrement instead of pget(), but code bloat not worth it? - while xx >= 0 and pget(xx as uword, yy as uword) == cx16.r11L + while xx >= 0 { + if pget(xx as uword, yy as uword) != cx16.r11L + break xx-- + } if x1!=xx horizontal_line(xx as uword+1, yy as uword, x1-xx as uword, cx16.r10L) else @@ -599,8 +604,11 @@ gfx2 { do { cx16.r9 = xx ; TODO: if mode==1 (256c) use vera autoincrement instead of pget(), but code bloat not worth it? - while xx <= width-1 and pget(xx as uword, yy as uword) == cx16.r11L + while xx <= width-1 { + if pget(xx as uword, yy as uword) != cx16.r11L + break xx++ + } if cx16.r9!=xx horizontal_line(cx16.r9, yy as uword, (xx as uword)-cx16.r9, cx16.r10L) @@ -609,8 +617,11 @@ gfx2 { push_stack(x2 + 1, xx - 1, yy, -dy) skip: xx++ - while xx <= x2 and pget(xx as uword, yy as uword) != cx16.r11L + while xx <= x2 { + if pget(xx as uword, yy as uword) == cx16.r11L + break xx++ + } left = xx } until xx>x2 } diff --git a/compiler/res/prog8lib/cx16/monogfx.p8 b/compiler/res/prog8lib/cx16/monogfx.p8 index 21e21dd17..92257c2f1 100644 --- a/compiler/res/prog8lib/cx16/monogfx.p8 +++ b/compiler/res/prog8lib/cx16/monogfx.p8 @@ -525,11 +525,13 @@ _done }} } - sub fill(word @zp xx, word @zp yy, bool draw) { + sub fill(uword x, uword y, bool draw) { ; Non-recursive scanline flood fill. ; based loosely on code found here https://www.codeproject.com/Articles/6017/QuickFill-An-efficient-flood-fill-algorithm ; with the fixes applied to the seedfill_4 routine as mentioned in the comments. const ubyte MAXDEPTH = 48 + word @zp xx = x as word + word @zp yy = y as word word[MAXDEPTH] @split @shared stack_xl word[MAXDEPTH] @split @shared stack_xr word[MAXDEPTH] @split @shared stack_y @@ -608,8 +610,11 @@ _done while cx16.r12L { pop_stack() xx = x1 - while xx >= 0 and pget(xx as uword, yy as uword) == cx16.r11L + while xx >= 0 { + if pget(xx as uword, yy as uword) != cx16.r11L + break xx-- + } if x1!=xx horizontal_line(xx as uword+1, yy as uword, x1-xx as uword, cx16.r10L) else @@ -622,8 +627,11 @@ _done do { cx16.r9 = xx - while xx <= width-1 and pget(xx as uword, yy as uword) == cx16.r11L + while xx <= width-1 { + if pget(xx as uword, yy as uword) != cx16.r11L + break xx++ + } if cx16.r9!=xx horizontal_line(cx16.r9, yy as uword, (xx as uword)-cx16.r9, cx16.r10L) @@ -632,8 +640,11 @@ _done push_stack(x2 + 1, xx - 1, yy, -dy) skip: xx++ - while xx <= x2 and pget(xx as uword, yy as uword) != cx16.r11L + while xx <= x2 { + if pget(xx as uword, yy as uword) == cx16.r11L + break xx++ + } left = xx } until xx>x2 } diff --git a/compiler/res/prog8lib/virtual/monogfx.p8 b/compiler/res/prog8lib/virtual/monogfx.p8 index a6a6a698c..1bcf1446a 100644 --- a/compiler/res/prog8lib/virtual/monogfx.p8 +++ b/compiler/res/prog8lib/virtual/monogfx.p8 @@ -259,11 +259,13 @@ monogfx { return sys.gfx_getpixel(xx, yy) } - sub fill(word @zp xx, word @zp yy, bool draw) { + sub fill(uword x, uword y, bool draw) { ; Non-recursive scanline flood fill. ; based loosely on code found here https://www.codeproject.com/Articles/6017/QuickFill-An-efficient-flood-fill-algorithm ; with the fixes applied to the seedfill_4 routine as mentioned in the comments. const ubyte MAXDEPTH = 48 + word @zp xx = x as word + word @zp yy = y as word word[MAXDEPTH] @split @shared stack_xl word[MAXDEPTH] @split @shared stack_xr word[MAXDEPTH] @split @shared stack_y @@ -304,8 +306,11 @@ monogfx { while cx16.r12L { pop_stack() xx = x1 - while xx >= 0 and pget(xx as uword, yy as uword) == cx16.r11L + while xx >= 0 { + if pget(xx as uword, yy as uword) != cx16.r11L + break xx-- + } if x1!=xx horizontal_line(xx as uword+1, yy as uword, x1-xx as uword, cx16.r10L) else @@ -318,8 +323,11 @@ monogfx { do { cx16.r9 = xx - while xx <= width-1 and pget(xx as uword, yy as uword) == cx16.r11L + while xx <= width-1 { + if pget(xx as uword, yy as uword) != cx16.r11L + break xx++ + } if cx16.r9!=xx horizontal_line(cx16.r9, yy as uword, (xx as uword)-cx16.r9, cx16.r10L) @@ -328,8 +336,11 @@ monogfx { push_stack(x2 + 1, xx - 1, yy, -dy) skip: xx++ - while xx <= x2 and pget(xx as uword, yy as uword) != cx16.r11L + while xx <= x2 { + if pget(xx as uword, yy as uword) == cx16.r11L + break xx++ + } left = xx } until xx>x2 } diff --git a/docs/source/todo.rst b/docs/source/todo.rst index 07c4a4255..bd54f57af 100644 --- a/docs/source/todo.rst +++ b/docs/source/todo.rst @@ -1,8 +1,7 @@ TODO ==== -- fix fill() to not access pixels outside of the screen (use virtual testmongfx first?) -- change fill() to use unsigned types for optimization, and re-check previous problem. +- what makes while xx <= x2 and pget(xx as uword, yy as uword) == cx16.r11L so large - [on branch: shortcircuit] investigate McCarthy evaluation again? this may also reduce code size perhaps for things like if a>4 or a<2 .... - [on branch: ir-less-branch-opcodes] IR: reduce the number of branch instructions such as BEQ, BEQR, etc (gradually), replace with CMP(I) + status branch instruction diff --git a/virtualmachine/src/prog8/vm/GraphicsWindow.kt b/virtualmachine/src/prog8/vm/GraphicsWindow.kt index d6c50f437..aebc427a2 100644 --- a/virtualmachine/src/prog8/vm/GraphicsWindow.kt +++ b/virtualmachine/src/prog8/vm/GraphicsWindow.kt @@ -68,9 +68,9 @@ class GraphicsWindow(val pixelWidth: Int, val pixelHeight: Int, val pixelScaling fun getpixel(x: Int, y: Int): Int { if(x<0 || x>=pixelWidth) - throw IllegalArgumentException("plot x outside of screen: $x") + throw IllegalArgumentException("getpixel x outside of screen: $x") if(y<0 || y>=pixelHeight) - throw IllegalArgumentException("plot y outside of screen: $y") + throw IllegalArgumentException("getpixel y outside of screen: $y") return image.getRGB(x, y) } }