From 24944ad49e29b99aed34be3f791ac9fd54254798 Mon Sep 17 00:00:00 2001
From: Irmen de Jong <irmen@razorvine.net>
Date: Wed, 7 Feb 2024 02:09:08 +0100
Subject: [PATCH] added string.strip() and string.trim() and l/r variants.
 fixed memsizer for pointers-to-ubyte.

---
 codeCore/src/prog8/code/target/AtariTarget.kt |  6 +-
 codeCore/src/prog8/code/target/VMTarget.kt    |  5 +-
 .../prog8/code/target/cbm/CbmMemorySizer.kt   |  5 +-
 compiler/res/prog8lib/string.p8               | 62 ++++++++++++++++
 compiler/res/prog8lib/virtual/string.p8       | 63 +++++++++++++++++
 docs/source/libraries.rst                     | 18 +++++
 docs/source/todo.rst                          |  3 +
 examples/test.p8                              | 70 ++++++++++++-------
 8 files changed, 203 insertions(+), 29 deletions(-)

diff --git a/codeCore/src/prog8/code/target/AtariTarget.kt b/codeCore/src/prog8/code/target/AtariTarget.kt
index b49ca6e77..1f7cb900c 100644
--- a/codeCore/src/prog8/code/target/AtariTarget.kt
+++ b/codeCore/src/prog8/code/target/AtariTarget.kt
@@ -23,5 +23,9 @@ class AtariTarget: ICompilationTarget, IStringEncoding by Encoder, IMemSizer {
     }
 
     override fun memorySize(arrayDt: DataType, numElements: Int) =
-        memorySize(ArrayToElementTypes.getValue(arrayDt)) * numElements
+        if(arrayDt==DataType.UWORD)
+            numElements    // pointer to bytes.
+        else
+            memorySize(ArrayToElementTypes.getValue(arrayDt)) * numElements
+
 }
diff --git a/codeCore/src/prog8/code/target/VMTarget.kt b/codeCore/src/prog8/code/target/VMTarget.kt
index 956fb2ef5..b0ca556a4 100644
--- a/codeCore/src/prog8/code/target/VMTarget.kt
+++ b/codeCore/src/prog8/code/target/VMTarget.kt
@@ -22,5 +22,8 @@ class VMTarget: ICompilationTarget, IStringEncoding by Encoder, IMemSizer {
     }
 
     override fun memorySize(arrayDt: DataType, numElements: Int) =
-        memorySize(ArrayToElementTypes.getValue(arrayDt)) * numElements
+        if(arrayDt==DataType.UWORD)
+            numElements    // pointer to bytes.
+        else
+            memorySize(ArrayToElementTypes.getValue(arrayDt)) * numElements
 }
\ No newline at end of file
diff --git a/codeCore/src/prog8/code/target/cbm/CbmMemorySizer.kt b/codeCore/src/prog8/code/target/cbm/CbmMemorySizer.kt
index f55a14215..3c2cba150 100644
--- a/codeCore/src/prog8/code/target/cbm/CbmMemorySizer.kt
+++ b/codeCore/src/prog8/code/target/cbm/CbmMemorySizer.kt
@@ -14,5 +14,8 @@ internal object CbmMemorySizer: IMemSizer {
     }
 
     override fun memorySize(arrayDt: DataType, numElements: Int) =
-        memorySize(ArrayToElementTypes.getValue(arrayDt)) * numElements
+        if(arrayDt==DataType.UWORD)
+            numElements    // pointer to bytes.
+        else
+            memorySize(ArrayToElementTypes.getValue(arrayDt)) * numElements
 }
\ No newline at end of file
diff --git a/compiler/res/prog8lib/string.p8 b/compiler/res/prog8lib/string.p8
index bd4739f2a..31d2a9462 100644
--- a/compiler/res/prog8lib/string.p8
+++ b/compiler/res/prog8lib/string.p8
@@ -386,6 +386,68 @@ fail    clc             ; yes, no match found, return with c=0
         }}
     }
 
+    sub strip(str s) {
+        ; -- gets rid of whitespace and other non-visible characters at the edges of the string
+        rstrip(s)
+        lstrip(s)
+    }
+
+    sub rstrip(str s) {
+        ; -- gets rid of whitespace and other non-visible characters at the end of the string
+        if s[0]==0
+            return
+        cx16.r0L = length(s)
+        do {
+            cx16.r0L--
+            cx16.r1L = s[cx16.r0L]
+        } until cx16.r0L==0 or isprint(cx16.r1L) and not isspace(cx16.r1L)
+        s[cx16.r0L+1] = 0
+    }
+
+    sub lstrip(str s) {
+        ; -- gets rid of whitespace and other non-visible characters at the start of the string
+        if s[0]==0
+            return
+        cx16.r0L = 255
+        do {
+            cx16.r0L++
+            cx16.r1L = s[cx16.r0L]
+        } until cx16.r1L==0 or isprint(cx16.r1L) and not isspace(cx16.r1L)
+        if cx16.r0L>0
+            copy(s+cx16.r0L, s)
+    }
+
+    sub trim(str s) {
+        ; -- gets rid of whitespace characters at the edges of the string
+        rtrim(s)
+        ltrim(s)
+    }
+
+    sub rtrim(str s) {
+        ; -- gets rid of whitespace characters at the end of the string
+        if s[0]==0
+            return
+        cx16.r0L = length(s)
+        do {
+            cx16.r0L--
+            cx16.r1L = s[cx16.r0L]
+        } until cx16.r0L==0 or not isspace(cx16.r1L)
+        s[cx16.r0L+1] = 0
+    }
+
+    sub ltrim(str s) {
+        ; -- gets rid of whitespace characters at the start of the string
+        if s[0]==0
+            return
+        cx16.r0L = 255
+        do {
+            cx16.r0L++
+            cx16.r1L = s[cx16.r0L]
+        } until not isspace(cx16.r1L)
+        if cx16.r0L>0
+            copy(s+cx16.r0L, s)
+    }
+
     asmsub isdigit(ubyte petsciichar @A) -> bool @Pc {
         %asm {{
             cmp  #'0'
diff --git a/compiler/res/prog8lib/virtual/string.p8 b/compiler/res/prog8lib/virtual/string.p8
index c02ae5873..5a7a99c52 100644
--- a/compiler/res/prog8lib/virtual/string.p8
+++ b/compiler/res/prog8lib/virtual/string.p8
@@ -184,6 +184,69 @@ string {
         }
     }
 
+
+    sub strip(str s) {
+        ; -- gets rid of whitespace and other non-visible characters at the edges of the string
+        rstrip(s)
+        lstrip(s)
+    }
+
+    sub rstrip(str s) {
+        ; -- gets rid of whitespace and other non-visible characters at the end of the string
+        if s[0]==0
+            return
+        cx16.r0L = length(s)
+        do {
+            cx16.r0L--
+            cx16.r1L = s[cx16.r0L]
+        } until cx16.r0L==0 or isprint(cx16.r1L) and not isspace(cx16.r1L)
+        s[cx16.r0L+1] = 0
+    }
+
+    sub lstrip(str s) {
+        ; -- gets rid of whitespace and other non-visible characters at the start of the string
+        if s[0]==0
+            return
+        cx16.r0L = 255
+        do {
+            cx16.r0L++
+            cx16.r1L = s[cx16.r0L]
+        } until cx16.r1L==0 or isprint(cx16.r1L) and not isspace(cx16.r1L)
+        if cx16.r0L>0
+            copy(s+cx16.r0L, s)
+    }
+
+    sub trim(str s) {
+        ; -- gets rid of whitespace characters at the edges of the string
+        rtrim(s)
+        ltrim(s)
+    }
+
+    sub rtrim(str s) {
+        ; -- gets rid of whitespace characters at the end of the string
+        if s[0]==0
+            return
+        cx16.r0L = length(s)
+        do {
+            cx16.r0L--
+            cx16.r1L = s[cx16.r0L]
+        } until cx16.r0L==0 or not isspace(cx16.r1L)
+        s[cx16.r0L+1] = 0
+    }
+
+    sub ltrim(str s) {
+        ; -- gets rid of whitespace characters at the start of the string
+        if s[0]==0
+            return
+        cx16.r0L = 255
+        do {
+            cx16.r0L++
+            cx16.r1L = s[cx16.r0L]
+        } until not isspace(cx16.r1L)
+        if cx16.r0L>0
+            copy(s+cx16.r0L, s)
+    }
+
     sub isdigit(ubyte character) -> bool {
         return character>='0' and character<='9'
     }
diff --git a/docs/source/libraries.rst b/docs/source/libraries.rst
index 31730b885..8e481691f 100644
--- a/docs/source/libraries.rst
+++ b/docs/source/libraries.rst
@@ -290,6 +290,24 @@ Provides string manipulation routines.
 ``upperchar (char)``
     Returns uppercased PETSCII character.
 
+``strip (string)``
+    Gets rid of whitespace and other non-visible characters at the edges of the string.
+
+``rstrip (string)``
+    Gets rid of whitespace and other non-visible characters at the end of the string.
+
+``lstrip (string)``
+    Gets rid of whitespace and other non-visible characters at the start of the string.
+
+``trim (string)``
+    Gets rid of whitespace characters at the edges of the string.
+
+``rtrim (string)``
+    Gets rid of whitespace characters at the end of the string.
+
+``ltrim (string)``
+    Gets rid of whitespace characters at the start of the string.
+
 ``isdigit (char)``
     Returns boolean if the character is a numerical digit 0-9
 
diff --git a/docs/source/todo.rst b/docs/source/todo.rst
index 25eba1273..870f3bacb 100644
--- a/docs/source/todo.rst
+++ b/docs/source/todo.rst
@@ -1,6 +1,9 @@
 TODO
 ====
 
+&pointervar[x]  isn't correct?? (at least in IR)
+
+
 (after merge in boolean): move all "OperatorXinplace" from expressionGen to AssignmentGen, see if we can get rid of the Result return type.
 
 ...
diff --git a/examples/test.p8 b/examples/test.p8
index 77ec82e5b..be3451504 100644
--- a/examples/test.p8
+++ b/examples/test.p8
@@ -1,37 +1,55 @@
 %import textio
+%import string
+
 %zeropage basicsafe
 %option no_sysinit
 
 main {
     sub start() {
-        ubyte @shared xx=1
-        uword @shared yy
+        str name1 = ""
+        str name2 = "hello \r\n"
+        str name3 = "  \n\rhello"
+        str name4 = "  \n\r\xa0\xa0\xff\xffhello\x02\x02\x02  \n  "
 
-        ubyte[16] array
-        array[1] = 1
+        txt.print("strip:\n")
+        string.strip(name1)
+        txt.chrout('[')
+        txt.print(name1)
+        txt.print("]\n")
+        string.strip(name2)
+        txt.chrout('[')
+        txt.print(name2)
+        txt.print("]\n")
+        string.strip(name3)
+        txt.chrout('[')
+        txt.print(name3)
+        txt.print("]\n")
+        string.strip(name4)
+        txt.chrout('[')
+        txt.print(name4)
+        txt.print("]\n")
 
-        xx += 3
-        yy += 3
-        xx -= 3
-        yy -= 3
-
-        txt.print_ub(array[1])
-        txt.spc()
-        array[1]++
-        txt.print_ub(array[1])
-        txt.spc()
-        array[1]--
-        txt.print_ub(array[1])
-        txt.nl()
-
-        txt.print_ub(array[1])
-        txt.spc()
-        array[xx]++
-        txt.print_ub(array[1])
-        txt.spc()
-        array[xx]--
-        txt.print_ub(array[1])
-        txt.nl()
+        str tname1 = ""
+        str tname2 = "hello \r\n"
+        str tname3 = "  \n\r\x09hello"
+        str tname4 = "  \n\x09\x0b\r\xa0\xa0\xff\xffhello\x05\x05\x05  \n  "
 
+        txt.print("trim:\n")
+        string.trim(tname1)
+        txt.chrout('[')
+        txt.print(tname1)
+        txt.print("]\n")
+        string.trim(tname2)
+        txt.chrout('[')
+        txt.print(tname2)
+        txt.print("]\n")
+        string.trim(tname3)
+        txt.chrout('[')
+        txt.print(tname3)
+        txt.print("]\n")
+        string.trim(tname4)
+        txt.chrout('[')
+        txt.print(tname4)
+        txt.print("]\n")
     }
 }