strings: added next_token() which mimicks C's strtok() routine

This commit is contained in:
Irmen de Jong
2026-01-25 00:09:52 +01:00
parent 4aa326d5ca
commit 5cbd4aafcc
3 changed files with 48 additions and 5 deletions
@@ -162,4 +162,31 @@ strings {
sys.clear_carry()
return 255
}
sub next_token(str source, str delimiters) -> str {
; -- Tokenize the source string according to the list of delimiter characters. Like C's ``strtok`` function.
if source == 0
source = last_token_source
else
last_token_source = source
if last_token_source^^ == 0
return 0
while last_token_source^^ != 0 {
^^ubyte dptr = delimiters
while dptr^^ != 0 {
if last_token_source^^ == dptr^^ {
last_token_source^^ = 0
last_token_source += 1
return source
}
dptr += 1
}
last_token_source += 1
}
return source
}
^^ubyte last_token_source
}
+8 -1
View File
@@ -1160,6 +1160,13 @@ manipulation
Copy a string to another, overwriting that one, but limited to the given length.
Returns the length of the string that was copied.
``next_token(str source, str delimiters) -> str``
Tokenize the source string according to the list of delimiter characters. Like C's ``strtok`` function.
You pass in the string to tokenize and the delimiters (make sure the delimiters end with a trailing 0 as well).
The routine returns a pointer to the next token (or first token, if you pass in a new string).
To get the next tokens, keep calling the routine but pass 0 for the source string (which tells it to continue
processing the previous string). It returns 0 when there are no more tokens.
``right (source, length, target)``
Copies the right side of the source string of the given length to target string.
It is assumed the target string buffer is large enough to contain the result (which includes a terminating 0 byte).
@@ -1178,7 +1185,7 @@ manipulation
Start and length must be within bounds of the source string.
Writes in-place; doesn't return a value (so can't be used in an expression).
``split (string, parts, max_parts)``
``split (string, parts, max_parts) -> ubyte``
Splits string into parts separated by white space (destructive).
Pointers to each part are stored in the given parts array (sequential uwords, for instance in a @nosplit uword array), up to the given maximum number of parts.
Returns the number of parts stored.
+13 -4
View File
@@ -5,8 +5,18 @@
main {
sub start() {
uword[4] @nosplit parts
^^ubyte sentence = "the quick brown fox jumps over the lazy dog."
ubyte[] whitespace = [ 9, 10, 13, 32, 160, 0 ]
txt.lowercase()
^^ubyte token = strings.next_token(sentence, whitespace)
while token != 0 {
txt.print(token)
txt.nl()
token = strings.next_token(0, whitespace)
}
uword[4] @nosplit parts
ubyte numparts
numparts = strings.split(0, parts, len(parts))
@@ -18,14 +28,13 @@ main {
numparts = strings.split("hello", parts, len(parts))
printparts(numparts, parts)
numparts = strings.split("the quick brown fox jumps over the lazy dog", &parts, len(parts))
numparts = strings.split("the quick brown fox jumps over the lazy dog", parts, len(parts))
printparts(numparts, parts)
numparts = strings.split(" the quick brown fox jumps over the lazy dog ", &parts, len(parts))
numparts = strings.split(" the quick brown fox jumps over the lazy dog ", parts, len(parts))
printparts(numparts, parts)
}
sub printparts(ubyte numparts, ^^uword parts) {
txt.print_ub(numparts)
txt.print(" parts: ")