optimize certain ptr+value expression on 6502

This commit is contained in:
Irmen de Jong
2025-08-24 03:54:52 +02:00
parent e2a2db1256
commit 8e53c83844
7 changed files with 336 additions and 243 deletions
@@ -325,6 +325,7 @@ class DataType private constructor(val base: BaseDataType, val sub: BaseDataType
fun largerSizeThan(other: DataType): Boolean = base.largerSizeThan(other.base)
fun equalsSize(other: DataType): Boolean = base.equalsSize(other.base)
// note: for pointer types, size() doesn't return the size of the pointer itself but the size of the thing it points to
fun size(memsizer: IMemSizer): Int = if(sub!=null) {
memsizer.memorySize(sub)
} else if(subType!=null) {
@@ -2277,7 +2277,7 @@ $shortcutLabel:""")
private fun inplacemodificationWordWithVariable(name: String, dt: DataType, operator: String, otherName: String, valueDt: DataType, block: PtBlock?) {
require(dt.isWord)
require(valueDt.isInteger)
require(valueDt.isInteger || valueDt.isPointer)
when {
valueDt.isByte -> {
// the other variable is a BYTE type so optimize for that
@@ -348,6 +348,39 @@ class StatementOptimizer(private val program: Program,
}
}
// pointer arithmetic for 6502 target
if (options.compTarget.cpu != CpuType.VIRTUAL) {
if(!assignment.isAugmentable && (bexpr.operator=="+" || bexpr.operator=="-")) {
if(targetIDt.isUnsignedWord || targetIDt.getOrUndef().isPointerToByte) {
val leftDt = bexpr.left.inferType(program).getOrUndef()
val rightDt = bexpr.right.inferType(program).getOrUndef()
fun setSizedValue(a: Assignment, value: Expression, size: Int) {
val sized = BinaryExpression(value, "*", NumericLiteral.optimalInteger(size, value.position), value.position)
a.value = sized
sized.linkParents(a)
}
if (leftDt.isPointer && !leftDt.isPointerToByte) {
// uword x = pointer + value --> x=value * sizeof , x += pointer
val size = leftDt.size(options.compTarget)
setSizedValue(assignment, bexpr.right, size)
val pointerAdd = BinaryExpression(assignment.target.toExpression(), bexpr.operator, bexpr.left, bexpr.position)
val a2 = Assignment(assignment.target.copy(), pointerAdd, assignment.origin, assignment.position)
return listOf(IAstModification.InsertAfter(assignment, a2, parent as IStatementContainer))
} else if (rightDt.isPointer && !rightDt.isPointerToByte) {
// uword x = value + pointer --> x=value * sizeof, x += pointer
val size = rightDt.size(options.compTarget)
setSizedValue(assignment, bexpr.left, size)
assignment.linkParents(parent)
val pointerAdd = BinaryExpression(assignment.target.toExpression(), bexpr.operator, bexpr.right, bexpr.position)
val a2 = Assignment(assignment.target.copy(), pointerAdd, assignment.origin, assignment.position)
return listOf(IAstModification.InsertAfter(assignment, a2, parent as IStatementContainer))
}
}
}
}
}
// word = lsb(word)
+10 -10
View File
@@ -5,7 +5,7 @@ compression {
%option ignore_unused, merge
sub encode_rle_outfunc(^^ubyte data, uword size, uword output_function, bool is_last_block) {
sub encode_rle_outfunc(uword data, uword size, uword output_function, bool is_last_block) {
; -- Compress the given data block using ByteRun1 aka PackBits RLE encoding.
; output_function = address of a routine that gets a byte arg in A,
; which is the next RLE byte to write to the compressed output buffer or file.
@@ -35,7 +35,7 @@ compression {
sub output_literals() {
call_output_function(literals_length-1)
^^ubyte dataptr = data + literals_start_idx
uword dataptr = data + literals_start_idx
ubyte i
for i in 0 to literals_length-1 {
call_output_function(@(dataptr))
@@ -70,7 +70,7 @@ compression {
call_output_function(128)
}
sub encode_rle(^^ubyte data, uword size, ^^ubyte target, bool is_last_block) -> uword {
sub encode_rle(uword data, uword size, uword target, bool is_last_block) -> uword {
; -- Compress the given data block using ByteRun1 aka PackBits RLE encoding.
; Returns the size of the compressed RLE data. Worst case result storage size needed = (size + (size+126) / 127) + 1.
; is_last_block = usually true, but you can set it to false if you want to concatenate multiple
@@ -79,7 +79,7 @@ compression {
uword idx = 0
uword literals_start_idx = 0
ubyte literals_length = 0
^^ubyte orig_target = target
uword orig_target = target
sub next_same_span() {
; returns length in cx16.r1L, and the byte value in cx16.r1H
@@ -94,7 +94,7 @@ compression {
sub output_literals() {
@(target) = literals_length-1
target++
^^ubyte dataptr = data + literals_start_idx
uword dataptr = data + literals_start_idx
ubyte i
for i in 0 to literals_length-1 {
@(target) = @(dataptr)
@@ -136,7 +136,7 @@ compression {
return target-orig_target
}
asmsub decode_rle_srcfunc(uword source_function @AY, ^^ubyte target @R0, uword maxsize @R1) clobbers(X) -> uword @AY {
asmsub decode_rle_srcfunc(uword source_function @AY, uword target @R0, uword maxsize @R1) clobbers(X) -> uword @AY {
; -- Decodes "ByteRun1" (aka PackBits) RLE compressed data. Control byte value 128 ends the decoding.
; Also stops decompressing when the maxsize has been reached. Returns the size of the decompressed data.
; Instead of a source buffer, you provide a callback function that must return the next byte to decompress in A.
@@ -239,7 +239,7 @@ _end
}}
}
asmsub decode_rle(^^ubyte compressed @AY, ^^ubyte target @R0, uword maxsize @R1) clobbers(X) -> uword @AY {
asmsub decode_rle(uword compressed @AY, uword target @R0, uword maxsize @R1) clobbers(X) -> uword @AY {
; -- Decodes "ByteRun1" (aka PackBits) RLE compressed data. Control byte value 128 ends the decoding.
; Also stops decompressing if the maxsize has been reached.
; Returns the size of the decompressed data.
@@ -355,7 +355,7 @@ _end
}
asmsub decode_zx0(^^ubyte compressed @R0, ^^ubyte target @R1) clobbers(A,X,Y) {
asmsub decode_zx0(uword compressed @R0, uword target @R1) clobbers(A,X,Y) {
; Decompress a block of data compressed in the ZX0 format
; This can be produced using the "salvador" compressor with -classic
; It has faster decompression than LZSA and a better compression ratio as well.
@@ -624,7 +624,7 @@ zx0_gamma_done: tax ; Preserve bit-buffer.
}
asmsub decode_tscrunch(^^ubyte compressed @R0, ^^ubyte target @R1) clobbers(A,X,Y) {
asmsub decode_tscrunch(uword compressed @R0, uword target @R1) clobbers(A,X,Y) {
; Decompress a block of data compressed by TSCRUNCH
; see https://github.com/tonysavon/TSCrunch
; It has extremely fast decompression (approaching RLE speeds),
@@ -865,7 +865,7 @@ lzput = cx16.r3 ; 2 bytes
}
asmsub decode_tscrunch_inplace(^^ubyte compressed @R0) clobbers(A,X,Y) {
asmsub decode_tscrunch_inplace(uword compressed @R0) clobbers(A,X,Y) {
; Decompress a block of data compressed by TSCRUNCH *in place*
; This can save an extra memory buffer if you are reading crunched data from a file into a buffer.
; see https://github.com/tonysavon/TSCrunch
+8 -10
View File
@@ -66,21 +66,20 @@ _done
}
}
; TODO convert to ^^uword once code size regression is fixed
sub gnomesort_uw(uword @requirezp values, ubyte num_elements) {
; Sorts the values array (no-split unsigned words).
; Max number of elements is 128. Clobbers R0 and R1.
sub gnomesort_uw(uword @requirezp wordvalues, ubyte num_elements) {
; Sorts the wordvalues array (no-split unsigned words).
; Max number of elements is 128 to keep indexing code size small and fast. Clobbers R0 and R1.
ubyte @zp pos=2
num_elements *= 2
while pos != num_elements {
cx16.r1L = pos-2
if peekw(values+pos) >= peekw(values + cx16.r1L)
if peekw(wordvalues+pos) >= peekw(wordvalues + cx16.r1L)
pos += 2
else {
; swap elements
cx16.r0 = peekw(values + cx16.r1L)
pokew(values + cx16.r1L, peekw(values + pos))
pokew(values + pos, cx16.r0)
cx16.r0 = peekw(wordvalues + cx16.r1L)
pokew(wordvalues + cx16.r1L, peekw(wordvalues + pos))
pokew(wordvalues + pos, cx16.r0)
pos-=2
if_z
pos+=2
@@ -88,11 +87,10 @@ _done
}
}
; TODO convert to ^^uword once code size regression is fixed
sub gnomesort_by_uw(uword @requirezp uw_keys, uword wordvalues, ubyte num_elements) {
; Sorts the 'wordvalues' array according to the 'uw_keys' array (which also gets sorted ofcourse).
; both arrays should be no-split array of words. uw_keys are unsigned.
; Max number of elements is 128. Clobbers R0 and R1.
; Max number of elements is 128 to keep indexing code size small and fast. Clobbers R0 and R1.
ubyte @zp pos=2
num_elements *= 2
while pos != num_elements {
+3 -1
View File
@@ -15,7 +15,7 @@ STRUCTS and TYPED POINTERS (6502 codegen specific)
- scan through 6502 library modules to change untyped uword pointers to typed pointers; shared, cx16, c64, c128, pet32, custom targets
*shared:*
sorting (first fix code size issue)
(done) sorting (on hold because of code size issues)
(done) conv
(done) shared_cbm_diskio
(done) shared_cbm_textio_functions
@@ -32,6 +32,7 @@ STRUCTS and TYPED POINTERS (6502 codegen specific)
(done) strings
(done) test_stack
- update the docs about the libraries so they also use typed pointers where appropriate
- implement the TODO's in PointerAssignmentsGen.
- scan through 6502 examples to change untyped uword pointers to typed pointers
- fix code size regressions (if any left)
@@ -41,3 +42,4 @@ STRUCTS and TYPED POINTERS (6502 codegen specific)
- optimize the multiplications in assignAddressOfIndexedPointer()
- optimize the float copying in assignIndexedPointer() (also word?)
- implement some more struct instance assignments (via memcopy) in CodeDesugarer (see the TODO) (add to documentation as well, paragraph 'Structs')
- try to optimize pointer arithmetic used in peek/poke a bit more so the routines in sorting module can use typed pointers without increasing code size
+280 -221
View File
@@ -1,228 +1,287 @@
%option no_sysinit
%zeropage kernalsafe
%import textio
%import compression
%import math
%import sorting
%import strings
%import diskio
%import floats
%option no_sysinit
%zeropage floatsafe
main {
uword @requirezp @shared v1 = 10000
^^float @requirezp @shared v2 = 10000
; 6502: pointer + byteindex -> (if pointer value size >1 :) assign byteindex*$000x , add pointer.
sub start() {
; test_compression()
test_sorting1()
test_sorting2()
; test_math()
; test_syslib()
; test_strings()
; test_conv()
; test_diskio()
; test_textio()
repeat {}
cx16.r9L = 10
; one() ; $e4
; one2() ; $ed
; one3() ; $ea
two() ; $f0
; two2() ; $ed
two3() ; $ea
}
sub one() {
cx16.r0 = v1 + cx16.r9L*$0002
}
sub one2() {
cx16.r0 = v1
cx16.r0 += cx16.r9L*$0002
}
sub one3() {
cx16.r0 = cx16.r9L*$0002
cx16.r0 += v1
}
sub two() {
cx16.r0 = v2 + cx16.r9L
}
sub two2() {
cx16.r0 = v2
cx16.r0 += cx16.r9L*$0002
}
sub two3() {
cx16.r0 = cx16.r9L*$0005
cx16.r0 += v2
}
sub test_diskio() {
txt.print("--diskio--\n")
sys.memset(target, len(target), 0)
diskio.delete("derp.bin")
void diskio.f_open_w("derp.bin")
repeat 12
void diskio.f_write("derpderp123", 11)
diskio.f_close_w()
void diskio.f_open("derp.bin")
diskio.f_read(target, 60)
txt.print(target)
txt.nl()
}
ubyte[100] target
sub test_conv() {
txt.print("--conv--\n")
txt.print_b(-111)
txt.spc()
txt.print_ub(222)
txt.spc()
txt.print_uw(22222)
txt.spc()
txt.print_w(-22222)
txt.nl()
txt.print_ubbin(222, true)
txt.spc()
txt.print_ubhex(222, true)
txt.spc()
txt.print_uwbin(2222, true)
txt.spc()
txt.print_uwhex(2222, true)
txt.nl()
txt.print_ub0(1)
txt.spc()
txt.print_uw0(123)
txt.nl()
}
sub test_strings() {
txt.print("--strings--\n")
ubyte idx
bool found
idx, found = strings.rfind(source, '1')
txt.print_ub(idx)
txt.nl()
}
sub test_textio() {
txt.print("--textio--\n")
txt.print("enter some input: ")
void txt.input_chars(&target)
txt.print(target)
txt.nl()
}
sub test_syslib() {
txt.print("--syslib--\n")
sys.internal_stringcopy(source, target)
txt.print(target)
txt.nl()
sys.memset(target, sizeof(target), 0)
txt.print(target)
txt.nl()
sys.memcopy(source, target, len(source))
txt.print(target)
txt.nl()
sys.memsetw(&target as ^^uword, 20, $5051)
txt.print(target)
txt.nl()
txt.print_b(sys.memcmp(source, target, len(source)))
txt.nl()
}
sub test_sorting1() {
txt.print("--sorting (shell)--\n")
ubyte[] bytes1 = [77,33,44,99,11,55]
ubyte[] bytes2 = [77,33,44,99,11,55]
uword[] @nosplit values1 = [1,2,3,4,5,6]
uword[] @nosplit words1 = [777,333,444,999,111,555]
uword[] @nosplit words2 = [777,333,444,999,111,555]
uword[] @nosplit values2 = [1,2,3,4,5,6]
sorting.shellsort_ub(&bytes1, len(bytes1))
sorting.shellsort_by_ub(&bytes2, &values1, len(bytes2))
sorting.shellsort_uw(&words1, len(words1))
sorting.shellsort_by_uw(&words2, &values2, len(words2))
for cx16.r0L in bytes1 {
txt.print_ub(cx16.r0L)
txt.spc()
}
txt.nl()
for cx16.r0L in bytes2 {
txt.print_ub(cx16.r0L)
txt.spc()
}
txt.nl()
for cx16.r0 in values1 {
txt.print_uw(cx16.r0)
txt.spc()
}
txt.nl()
for cx16.r0 in words1 {
txt.print_uw(cx16.r0)
txt.spc()
}
txt.nl()
for cx16.r0 in words2 {
txt.print_uw(cx16.r0)
txt.spc()
}
txt.nl()
for cx16.r0 in values2 {
txt.print_uw(cx16.r0)
txt.spc()
}
txt.nl()
}
sub test_sorting2() {
txt.print("--sorting (gnome)--\n")
ubyte[] bytes1 = [77,33,44,99,11,55]
ubyte[] bytes2 = [77,33,44,99,11,55]
uword[] @nosplit values1 = [1,2,3,4,5,6]
uword[] @nosplit words1 = [777,333,444,999,111,555]
uword[] @nosplit words2 = [777,333,444,999,111,555]
uword[] @nosplit values2 = [1,2,3,4,5,6]
sorting.gnomesort_ub(&bytes1, len(bytes1))
sorting.gnomesort_by_ub(&bytes2, &values1, len(bytes2))
sorting.gnomesort_uw(&words1, len(words1))
sorting.gnomesort_by_uw(&words2, &values2, len(words2))
for cx16.r0L in bytes1 {
txt.print_ub(cx16.r0L)
txt.spc()
}
txt.nl()
for cx16.r0L in bytes2 {
txt.print_ub(cx16.r0L)
txt.spc()
}
txt.nl()
for cx16.r0 in values1 {
txt.print_uw(cx16.r0)
txt.spc()
}
txt.nl()
for cx16.r0 in words1 {
txt.print_uw(cx16.r0)
txt.spc()
}
txt.nl()
for cx16.r0 in words2 {
txt.print_uw(cx16.r0)
txt.spc()
}
txt.nl()
for cx16.r0 in values2 {
txt.print_uw(cx16.r0)
txt.spc()
}
txt.nl()
}
sub test_math() {
txt.print("--math--\n")
txt.print("expected 15567: ")
txt.print_uw(math.crc16(source, len(source)))
txt.print("\nexpected 8747,54089: ")
math.crc32(source, len(source))
txt.print_uw(cx16.r14)
txt.chrout(',')
txt.print_uw(cx16.r15)
txt.nl()
}
str source = petscii:"Lorem ipsuuuuuuuuuuuum dollllllllllllllloooooooor sit ametttttttttttttttt, cccccccccccccccconsecteeeeetuuuuuur aaaaaaaaa111111222222333333444444"
sub test_compression() {
txt.print("--compression--\n")
ubyte[256] compressed
ubyte[256] decompressed
txt.print_uw(len(source))
txt.nl()
uword size = compression.encode_rle(source, len(source), compressed, true)
txt.print_uw(size)
txt.nl()
size = compression.decode_rle(compressed, decompressed, sizeof(decompressed))
txt.print_uw(size)
txt.nl()
txt.print(source)
txt.nl()
txt.print(decompressed)
txt.nl()
}
; sub gnomesort_uw(uword @requirezp values) {
; cx16.r0 = values+cx16.r0L*2
; ;cx16.r0 = peekw(values+cx16.r0L)
;; if peekw(values+cx16.r2L) >= peekw(values + cx16.r1L)
;; cx16.r0L++
; }
; sub gnomesort_uw2(^^uword @requirezp values) {
; cx16.r0 = values+cx16.r0L
;; cx16.r0 = peekw(values+cx16.r0L)
;; if peekw(values+cx16.r2L) >= peekw(values + cx16.r1L)
;; cx16.r0L++
; }
}
;%option no_sysinit
;%zeropage kernalsafe
;%import textio
;%import compression
;%import math
;%import sorting
;%import strings
;%import diskio
;
;main {
; sub start() {
;; test_compression()
; test_sorting1()
; test_sorting2()
;; test_math()
;; test_syslib()
;; test_strings()
;; test_conv()
;; test_diskio()
;; test_textio()
;
; repeat {}
; }
;
; sub test_diskio() {
; txt.print("--diskio--\n")
; sys.memset(target, len(target), 0)
; diskio.delete("derp.bin")
; void diskio.f_open_w("derp.bin")
; repeat 12
; void diskio.f_write("derpderp123", 11)
; diskio.f_close_w()
;
; void diskio.f_open("derp.bin")
; diskio.f_read(target, 60)
; txt.print(target)
; txt.nl()
; }
;
; ubyte[100] target
;
; sub test_conv() {
; txt.print("--conv--\n")
; txt.print_b(-111)
; txt.spc()
; txt.print_ub(222)
; txt.spc()
; txt.print_uw(22222)
; txt.spc()
; txt.print_w(-22222)
; txt.nl()
; txt.print_ubbin(222, true)
; txt.spc()
; txt.print_ubhex(222, true)
; txt.spc()
; txt.print_uwbin(2222, true)
; txt.spc()
; txt.print_uwhex(2222, true)
; txt.nl()
; txt.print_ub0(1)
; txt.spc()
; txt.print_uw0(123)
; txt.nl()
; }
;
; sub test_strings() {
; txt.print("--strings--\n")
; ubyte idx
; bool found
; idx, found = strings.rfind(source, '1')
; txt.print_ub(idx)
; txt.nl()
; }
;
; sub test_textio() {
; txt.print("--textio--\n")
; txt.print("enter some input: ")
; void txt.input_chars(&target)
; txt.print(target)
; txt.nl()
; }
;
; sub test_syslib() {
; txt.print("--syslib--\n")
; sys.internal_stringcopy(source, target)
; txt.print(target)
; txt.nl()
; sys.memset(target, sizeof(target), 0)
; txt.print(target)
; txt.nl()
; sys.memcopy(source, target, len(source))
; txt.print(target)
; txt.nl()
; sys.memsetw(&target as ^^uword, 20, $5051)
; txt.print(target)
; txt.nl()
; txt.print_b(sys.memcmp(source, target, len(source)))
; txt.nl()
; }
;
; sub test_sorting1() {
; txt.print("--sorting (shell)--\n")
; ubyte[] bytes1 = [77,33,44,99,11,55]
; ubyte[] bytes2 = [77,33,44,99,11,55]
; uword[] @nosplit values1 = [1,2,3,4,5,6]
; uword[] @nosplit words1 = [777,333,444,999,111,555]
; uword[] @nosplit words2 = [777,333,444,999,111,555]
; uword[] @nosplit values2 = [1,2,3,4,5,6]
; sorting.shellsort_ub(&bytes1, len(bytes1))
; sorting.shellsort_by_ub(&bytes2, &values1, len(bytes2))
; sorting.shellsort_uw(&words1, len(words1))
; sorting.shellsort_by_uw(&words2, &values2, len(words2))
;
; for cx16.r0L in bytes1 {
; txt.print_ub(cx16.r0L)
; txt.spc()
; }
; txt.nl()
; for cx16.r0L in bytes2 {
; txt.print_ub(cx16.r0L)
; txt.spc()
; }
; txt.nl()
; for cx16.r0 in values1 {
; txt.print_uw(cx16.r0)
; txt.spc()
; }
; txt.nl()
; for cx16.r0 in words1 {
; txt.print_uw(cx16.r0)
; txt.spc()
; }
; txt.nl()
; for cx16.r0 in words2 {
; txt.print_uw(cx16.r0)
; txt.spc()
; }
; txt.nl()
; for cx16.r0 in values2 {
; txt.print_uw(cx16.r0)
; txt.spc()
; }
; txt.nl()
;
; }
;
; sub test_sorting2() {
; txt.print("--sorting (gnome)--\n")
; ubyte[] bytes1 = [77,33,44,99,11,55]
; ubyte[] bytes2 = [77,33,44,99,11,55]
; uword[] @nosplit values1 = [1,2,3,4,5,6]
; uword[] @nosplit words1 = [777,333,444,999,111,555]
; uword[] @nosplit words2 = [777,333,444,999,111,555]
; uword[] @nosplit values2 = [1,2,3,4,5,6]
; sorting.gnomesort_ub(&bytes1, len(bytes1))
; sorting.gnomesort_by_ub(&bytes2, &values1, len(bytes2))
; sorting.gnomesort_uw(&words1, len(words1))
; sorting.gnomesort_by_uw(&words2, &values2, len(words2))
;
; for cx16.r0L in bytes1 {
; txt.print_ub(cx16.r0L)
; txt.spc()
; }
; txt.nl()
; for cx16.r0L in bytes2 {
; txt.print_ub(cx16.r0L)
; txt.spc()
; }
; txt.nl()
; for cx16.r0 in values1 {
; txt.print_uw(cx16.r0)
; txt.spc()
; }
; txt.nl()
; for cx16.r0 in words1 {
; txt.print_uw(cx16.r0)
; txt.spc()
; }
; txt.nl()
; for cx16.r0 in words2 {
; txt.print_uw(cx16.r0)
; txt.spc()
; }
; txt.nl()
; for cx16.r0 in values2 {
; txt.print_uw(cx16.r0)
; txt.spc()
; }
; txt.nl()
;
; }
;
; sub test_math() {
; txt.print("--math--\n")
; txt.print("expected 15567: ")
; txt.print_uw(math.crc16(source, len(source)))
; txt.print("\nexpected 8747,54089: ")
; math.crc32(source, len(source))
; txt.print_uw(cx16.r14)
; txt.chrout(',')
; txt.print_uw(cx16.r15)
; txt.nl()
; }
;
; str source = petscii:"Lorem ipsuuuuuuuuuuuum dollllllllllllllloooooooor sit ametttttttttttttttt, cccccccccccccccconsecteeeeetuuuuuur aaaaaaaaa111111222222333333444444"
;
; sub test_compression() {
; txt.print("--compression--\n")
;
; ubyte[256] compressed
; ubyte[256] decompressed
;
; txt.print_uw(len(source))
; txt.nl()
;
; uword size = compression.encode_rle(source, len(source), compressed, true)
; txt.print_uw(size)
; txt.nl()
;
; size = compression.decode_rle(compressed, decompressed, sizeof(decompressed))
; txt.print_uw(size)
; txt.nl()
; txt.print(source)
; txt.nl()
; txt.print(decompressed)
; txt.nl()
; }
;}