strings: added next_token() which mimicks C's strtok() routine

2026-04-24 05:25:49 +00:00 · 2026-01-25 00:09:52 +01:00
parent 4aa326d5ca
commit 5cbd4aafcc
3 changed files with 48 additions and 5 deletions
@@ -162,4 +162,31 @@ strings {
        sys.clear_carry()
        return 255
    }
+
+    sub next_token(str source, str delimiters) -> str {
+        ; -- Tokenize the source string according to the list of delimiter characters. Like C's ``strtok`` function.
+        if source == 0
+            source = last_token_source
+        else
+            last_token_source = source
+
+        if last_token_source^^ == 0
+            return 0
+
+        while last_token_source^^ != 0 {
+            ^^ubyte dptr = delimiters
+            while dptr^^ != 0 {
+                if last_token_source^^ == dptr^^ {
+                    last_token_source^^ = 0
+                    last_token_source += 1
+                    return source
+                }
+                dptr += 1
+            }
+            last_token_source += 1
+        }
+        return source
+    }
+    ^^ubyte last_token_source
+
 }
@@ -1160,6 +1160,13 @@ manipulation
    Copy a string to another, overwriting that one, but limited to the given length.
    Returns the length of the string that was copied.

+``next_token(str source, str delimiters) -> str``
+    Tokenize the source string according to the list of delimiter characters. Like C's ``strtok`` function.
+    You pass in the string to tokenize and the delimiters (make sure the delimiters end with a trailing 0 as well).
+    The routine returns a pointer to the next token (or first token, if you pass in a new string).
+    To get the next tokens, keep calling the routine but pass 0 for the source string (which tells it to continue
+    processing the previous string).  It returns 0 when there are no more tokens.
+
 ``right (source, length, target)``
    Copies the right side of the source string of the given length to target string.
    It is assumed the target string buffer is large enough to contain the result (which includes a terminating 0 byte).
@@ -1178,7 +1185,7 @@ manipulation
    Start and length must be within bounds of the source string.
    Writes in-place; doesn't return a value (so can't be used in an expression).

-``split (string, parts, max_parts)``
+``split (string, parts, max_parts) -> ubyte``
    Splits string into parts separated by white space (destructive).
    Pointers to each part are stored in the given parts array (sequential uwords, for instance in a @nosplit uword array), up to the given maximum number of parts.
    Returns the number of parts stored.
@@ -5,8 +5,18 @@

 main {
    sub start() {
-        uword[4] @nosplit parts
+        ^^ubyte sentence = "the quick brown fox jumps over the lazy dog."
+        ubyte[] whitespace = [ 9, 10, 13, 32, 160, 0 ]
+        txt.lowercase()

+        ^^ubyte token = strings.next_token(sentence, whitespace)
+        while token != 0 {
+            txt.print(token)
+            txt.nl()
+            token = strings.next_token(0, whitespace)
+        }
+
+        uword[4] @nosplit parts
        ubyte numparts

        numparts = strings.split(0, parts, len(parts))
@@ -18,14 +28,13 @@ main {
        numparts = strings.split("hello", parts, len(parts))
        printparts(numparts, parts)

-        numparts = strings.split("the quick brown fox jumps over the lazy dog", &parts, len(parts))
+        numparts = strings.split("the quick brown fox jumps over the lazy dog", parts, len(parts))
        printparts(numparts, parts)

-        numparts = strings.split("   the   quick   brown   fox  jumps  over  the  lazy  dog    ", &parts, len(parts))
+        numparts = strings.split("   the   quick   brown   fox  jumps  over  the  lazy  dog    ", parts, len(parts))
        printparts(numparts, parts)
    }

-
    sub printparts(ubyte numparts, ^^uword parts) {
        txt.print_ub(numparts)
        txt.print(" parts: ")