prog8/compiler/res/prog8lib/virtual/string.p8
2024-08-24 14:53:18 +02:00

207 lines
7.3 KiB
Lua
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

; 0-terminated string manipulation routines. For the Virtual Machine target.
%import shared_string_functions
string {
%option ignore_unused
sub length(str st) -> ubyte {
; Returns the number of bytes in the string.
; This value is determined during runtime and counts upto the first terminating 0 byte in the string,
; regardless of the size of the string during compilation time. Dont confuse this with len and sizeof!
ubyte count = 0
while st[count]!=0
count++
return count
}
sub left(str source, ubyte slen, str target) {
; Copies the left side of the source string of the given length to target string.
; It is assumed the target string buffer is large enough to contain the result.
; Also, you have to make sure yourself that length is smaller or equal to the length of the source string.
; Modifies in-place, doesnt return a value (so cant be used in an expression).
target[slen] = 0
ubyte ix
for ix in 0 to slen-1 {
target[ix] = source[ix]
}
}
sub right(str source, ubyte slen, str target) {
; Copies the right side of the source string of the given length to target string.
; It is assumed the target string buffer is large enough to contain the result.
; Also, you have to make sure yourself that length is smaller or equal to the length of the source string.
; Modifies in-place, doesnt return a value (so cant be used in an expression).
ubyte offset = length(source)-slen
ubyte ix
for ix in 0 to slen-1 {
target[ix] = source[ix+offset]
}
target[ix]=0
}
sub slice(str source, ubyte start, ubyte slen, str target) {
; Copies a segment from the source string, starting at the given index,
; and of the given length to target string.
; It is assumed the target string buffer is large enough to contain the result.
; Also, you have to make sure yourself that start and length are within bounds of the strings.
; Modifies in-place, doesnt return a value (so cant be used in an expression).
ubyte ix
for ix in 0 to slen-1 {
target[ix] = source[ix+start]
}
target[ix]=0
}
sub find(str st, ubyte character) -> ubyte {
; Locates the first position of the given character in the string,
; returns Carry set if found + index in A, or Carry clear if not found (and A will be 255, an invalid index).
; NOTE: because this isn't an asmsub, there's only a SINGLE return value here. On the c64/cx16 targets etc there are 2 return values.
ubyte ix
for ix in 0 to length(st)-1 {
if st[ix]==character {
sys.set_carry()
return ix
}
}
sys.clear_carry()
return 255
}
sub rfind(uword stringptr, ubyte character) -> ubyte {
; Locates the first position of the given character in the string, starting from the right.
; returns Carry set if found + index in A, or Carry clear if not found (and A will be 255, an invalid index).
; NOTE: because this isn't an asmsub, there's only a SINGLE return value here. On the c64/cx16 targets etc there are 2 return values.
ubyte ix
for ix in string.length(stringptr)-1 downto 0 {
if stringptr[ix]==character {
sys.set_carry()
return ix
}
}
sys.clear_carry()
return 255
}
sub contains(str st, ubyte character) -> bool {
void find(st, character)
if_cs
return true
return false
}
sub copy(str source, str target) -> ubyte {
; Copy a string to another, overwriting that one.
; Returns the length of the string that was copied.
; Often you dont have to call this explicitly and can just write string1 = string2
; but this function is useful if youre dealing with addresses for instance.
%ir {{
loadm.w r65534,string.copy.source
loadm.w r65535,string.copy.target
syscall 39 (r65534.w, r65535.w): r0.b
returnr.b r0
}}
}
sub append(str target, str suffix) -> ubyte {
; Append the suffix string to the target. (make sure the buffer is large enough!)
; Returns the length of the resulting string.
cx16.r0L = length(target)
return copy(suffix, target+cx16.r0L) + cx16.r0L
}
sub compare(str st1, str st2) -> byte {
; Compares two strings for sorting.
; Returns -1 (255), 0 or 1, meaning: string1 sorts before, equal or after string2.
; Note that you can also directly compare strings and string values with eachother using
; comparison operators ==, < etcetera (this will use strcmp automatically).
%ir {{
loadm.w r65534,string.compare.st1
loadm.w r65535,string.compare.st2
syscall 16 (r65534.w, r65535.w) : r0.b
returnr.b r0
}}
}
sub lower(str st) -> ubyte {
; Lowercases the petscii string in-place. Returns length of the string.
; (for efficiency, non-letter characters > 128 will also not be left intact,
; but regular text doesn't usually contain those characters anyway.)
ubyte ix
repeat {
ubyte char=st[ix]
if char==0
return ix
if char >= 'A' and char <= 'Z'
st[ix] = char | %00100000
ix++
}
}
sub upper(str st) -> ubyte {
; Uppercases the petscii string in-place. Returns length of the string.
ubyte ix
repeat {
ubyte char=st[ix]
if char==0
return ix
if char >= 97 and char <= 122
st[ix] = char & %11011111
ix++
}
}
sub lowerchar(ubyte char) -> ubyte {
if char >= 'A' and char <= 'Z'
char |= %00100000
return char
}
sub upperchar(ubyte char) -> ubyte {
if char >= 'a' and char <= 'z'
char &= %11011111
return char
}
sub hash(str st) -> ubyte {
; experimental 8 bit hashing function.
; hash(-1)=179; hash(i) = ROL hash(i-1) XOR string[i]
; (experimental because the quality of the resulting hash value still has to be determined)
ubyte hashcode = 179
ubyte ix
sys.clear_carry()
repeat {
if st[ix]!=0 {
rol(hashcode)
hashcode ^= st[ix]
ix++
} else
return hashcode
}
}
sub isdigit(ubyte character) -> bool {
return character>='0' and character<='9'
}
sub isupper(ubyte character) -> bool {
return character>='A' and character<='Z'
}
sub islower(ubyte character) -> bool {
return character>='a' and character<='z'
}
sub isletter(ubyte character) -> bool {
return islower(character) or isupper(character)
}
sub isspace(ubyte character) -> bool {
return character in [32, 13, 9, 10, 141, 160]
}
sub isprint(ubyte character) -> bool {
return character>=32 and character<=127 or character>=160
}
}