From 4cd9bb8f9926ff4beb0874d30c22ebd8366b747d Mon Sep 17 00:00:00 2001
From: Irmen de Jong <irmen@razorvine.net>
Date: Sat, 23 Dec 2023 16:37:28 +0100
Subject: [PATCH] allow Python-style negative array indexing to count from the
 end

---
 .../codegen/intermediate/ExpressionGen.kt     |  2 +-
 .../compiler/astprocessing/AstChecker.kt      |  7 ++---
 .../compiler/astprocessing/VariousCleanups.kt | 15 +++++++++++
 docs/source/programming.rst                   |  2 ++
 docs/source/syntaxreference.rst               |  5 +++-
 docs/source/todo.rst                          |  1 -
 examples/test.p8                              | 26 ++++++++++++-------
 7 files changed, 42 insertions(+), 16 deletions(-)
diff --git a/codeGenIntermediate/src/prog8/codegen/intermediate/ExpressionGen.kt b/codeGenIntermediate/src/prog8/codegen/intermediate/ExpressionGen.kt
index 2aaa18ed6..aef5c8b88 100644
--- a/codeGenIntermediate/src/prog8/codegen/intermediate/ExpressionGen.kt
+++ b/codeGenIntermediate/src/prog8/codegen/intermediate/ExpressionGen.kt
@@ -170,7 +170,7 @@ internal class ExpressionGen(private val codeGen: IRCodeGen) {
         if(arrayIx.usesPointerVariable) {
             if(eltSize!=1)
                 throw AssemblyError("non-array var indexing requires bytes dt")
-            if(arrayIx.index.type!=DataType.UBYTE)
+            if(arrayIx.index.type !in ByteDatatypes)
                 throw AssemblyError("non-array var indexing requires bytes index")
             val tr = translateExpression(arrayIx.index)
             addToResult(result, tr, tr.resultReg, -1)
diff --git a/compiler/src/prog8/compiler/astprocessing/AstChecker.kt b/compiler/src/prog8/compiler/astprocessing/AstChecker.kt
index 39b2d4251..5bf3e59f8 100644
--- a/compiler/src/prog8/compiler/astprocessing/AstChecker.kt
+++ b/compiler/src/prog8/compiler/astprocessing/AstChecker.kt
@@ -1374,11 +1374,10 @@ internal class AstChecker(private val program: Program,
             if(target.datatype !in IterableDatatypes && target.datatype!=DataType.UWORD)
                 errors.err("indexing requires an iterable or address uword variable", arrayIndexedExpression.position)
             val arraysize = target.arraysize?.constIndex()
+            val index = arrayIndexedExpression.indexer.constIndex()
             if(arraysize!=null) {
-                // check out of bounds
-                val index = arrayIndexedExpression.indexer.constIndex()
                 if(index!=null && (index<0 || index>=arraysize))
-                    errors.err("array index out of bounds", arrayIndexedExpression.indexer.position)
+                    errors.err("index out of bounds", arrayIndexedExpression.indexer.position)
             } else if(target.datatype == DataType.STR) {
                 if(target.value is StringLiteral) {
                     // check string lengths for non-memory mapped strings
@@ -1387,6 +1386,8 @@ internal class AstChecker(private val program: Program,
                     if (index != null && (index < 0 || index >= stringLen))
                         errors.err("index out of bounds", arrayIndexedExpression.indexer.position)
                 }
+            } else if(index!=null && index<0) {
+                errors.err("index out of bounds", arrayIndexedExpression.indexer.position)
             }
         } else
             errors.err("indexing requires a variable to act upon", arrayIndexedExpression.position)
diff --git a/compiler/src/prog8/compiler/astprocessing/VariousCleanups.kt b/compiler/src/prog8/compiler/astprocessing/VariousCleanups.kt
index 7b3658af1..af912e809 100644
--- a/compiler/src/prog8/compiler/astprocessing/VariousCleanups.kt
+++ b/compiler/src/prog8/compiler/astprocessing/VariousCleanups.kt
@@ -262,5 +262,20 @@ internal class VariousCleanups(val program: Program, val errors: IErrorReporter,
         }
         return noModifications
     }
+
+    override fun after(arrayIndexedExpression: ArrayIndexedExpression, parent: Node): Iterable<IAstModification> {
+        val index = arrayIndexedExpression.indexer.constIndex()
+        if(index!=null && index<0) {
+            val target = arrayIndexedExpression.arrayvar.targetVarDecl(program)
+            val arraysize = target?.arraysize?.constIndex()
+            if(arraysize!=null) {
+                // replace the negative index by the normal index
+                val newIndex = NumericLiteral.optimalNumeric(arraysize+index, arrayIndexedExpression.indexer.position)
+                arrayIndexedExpression.indexer.indexExpr = newIndex
+                newIndex.linkParents(arrayIndexedExpression.indexer)
+            }
+        }
+        return noModifications
+    }
 }
 
diff --git a/docs/source/programming.rst b/docs/source/programming.rst
index a387d5368..29a67a757 100644
--- a/docs/source/programming.rst
+++ b/docs/source/programming.rst
@@ -294,6 +294,7 @@ Here are some examples of arrays::
 
     value = array[3]            ; the fourth value in the array (index is 0-based)
     char = string[4]            ; the fifth character (=byte) in the string
+    char = string[-2]           ; the second-to-last character in the string (Python-style indexing from the end)
 
 .. note::
     Right now, the array should be small enough to be indexable by a single byte index.
@@ -329,6 +330,7 @@ An uword variable can be used in limited scenarios as a 'pointer' to a byte in m
 dynamic, location. You can use array indexing on a pointer variable to use it as a byte array at
 a dynamic location in memory: currently this is equivalent to directly referencing the bytes in
 memory at the given index. In contrast to a real array variable, the index value can be the size of a word.
+Unlike array variables, you cannot use a negative index to count from the end, because the size of the array is unknown.
 See also :ref:`pointervars_programming`
 
 **LSB/MSB split word arrays:**
diff --git a/docs/source/syntaxreference.rst b/docs/source/syntaxreference.rst
index 3a107c080..6365d43dd 100644
--- a/docs/source/syntaxreference.rst
+++ b/docs/source/syntaxreference.rst
@@ -507,10 +507,13 @@ Array indexing
 ^^^^^^^^^^^^^^
 
 Strings and arrays are a sequence of values. You can access the individual values by indexing.
-Syntax is familiar with brackets:  ``arrayvar[x]`` ::
+Negative index means counted from the end of the array rather than the beginning, where -1 means
+the last element in the array, -2 the second-to-last, etc. (Python uses this same scheme)
+Use brackets to index into an array:  ``arrayvar[x]`` ::
 
     array[2]        ; the third byte in the array (index is 0-based)
     string[4]       ; the fifth character (=byte) in the string
+    array[-2]       ; the second-to-last element
 
 Note: you can also use array indexing on a 'pointer variable', which is basically an uword variable
 containing a memory address. Currently this is equivalent to directly referencing the bytes in
diff --git a/docs/source/todo.rst b/docs/source/todo.rst
index e57027aa7..b6ed95eb0 100644
--- a/docs/source/todo.rst
+++ b/docs/source/todo.rst
@@ -84,4 +84,3 @@ Other language/syntax features to think about
   challenges: how to not make this too X16 specific? How does the compiler know what bank to switch (ram/rom)?
   How to make it performant when we want to (i.e. NOT have it use callfar/auto bank switching) ?
 - chained comparisons   `10<x<20` ,   `x==y==z`   (desugars to  `10<x and x<20`,   `x==y and y==z`) BUT this changes the semantics of what it is right now ! (x==(y==z) --> x==true)
-- negative array index to refer to an element from the end of the array.  Python `[-1]` or Raku syntax `[\*-1]`  , `[\*/2]` .... \*=size of the array
diff --git a/examples/test.p8 b/examples/test.p8
index 057765301..eff3792b8 100644
--- a/examples/test.p8
+++ b/examples/test.p8
@@ -3,16 +3,22 @@
 
 main {
     sub start() {
-        uword function = &test
-        uword @shared derp = call(function)
-        txt.print_uw(derp)
-        txt.nl()
-        void call(function)
-    }
+        ubyte[] barr = [1,2,3,4,5,6,7,8,9]
+        uword[] warr = [111,222,333,444,555,666,777,888,999]
+        uword pointer = &barr
+        byte index = 2
 
-    sub test() -> uword {
-        txt.print("test\n")
-        cx16.r0++
-        return 999
+        txt.print_ub(barr[7])
+        txt.nl()
+        txt.print_ub(barr[-2])
+        txt.nl()
+        txt.print_ub(pointer[7])
+        txt.nl()
+        txt.print_ub(pointer[index])
+        txt.nl()
+        txt.print_uw(warr[7])
+        txt.nl()
+        txt.print_uw(warr[-2])
+        txt.nl()
     }
 }