prog8/compiler/res/prog8lib/string.p8
Irmen de Jong f2d27403c5 add string.endswith() to efficiently test for a suffix without copying
add string.startswith() to efficiently test for string prefix without copying
2022-07-21 00:38:30 +02:00

317 lines
9.4 KiB
Lua
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

; 0-terminated string manipulation routines.
string {
asmsub length(uword string @AY) clobbers(A) -> ubyte @Y {
; Returns the number of bytes in the string.
; This value is determined during runtime and counts upto the first terminating 0 byte in the string,
; regardless of the size of the string during compilation time. Dont confuse this with len and sizeof!
%asm {{
sta P8ZP_SCRATCH_W1
sty P8ZP_SCRATCH_W1+1
ldy #0
- lda (P8ZP_SCRATCH_W1),y
beq +
iny
bne -
+ rts
}}
}
asmsub left(uword source @R0, ubyte length @A, uword target @R1) clobbers(A, Y) {
; Copies the left side of the source string of the given length to target string.
; It is assumed the target string buffer is large enough to contain the result.
; Also, you have to make sure yourself that length is smaller or equal to the length of the source string.
; Modifies in-place, doesnt return a value (so cant be used in an expression).
%asm {{
; need to copy the the cx16 virtual registers to zeropage to be compatible with C64...
ldy cx16.r0
sty P8ZP_SCRATCH_W1
ldy cx16.r0+1
sty P8ZP_SCRATCH_W1+1
ldy cx16.r1
sty P8ZP_SCRATCH_W2
ldy cx16.r1+1
sty P8ZP_SCRATCH_W2+1
tay
lda #0
sta (P8ZP_SCRATCH_W2),y
cpy #0
bne _loop
rts
_loop dey
lda (P8ZP_SCRATCH_W1),y
sta (P8ZP_SCRATCH_W2),y
cpy #0
bne _loop
+ rts
}}
; asmgen.out(" jsr prog8_lib.func_leftstr")
}
asmsub right(uword source @R0, ubyte length @A, uword target @R1) clobbers(A,Y) {
; Copies the right side of the source string of the given length to target string.
; It is assumed the target string buffer is large enough to contain the result.
; Also, you have to make sure yourself that length is smaller or equal to the length of the source string.
; Modifies in-place, doesnt return a value (so cant be used in an expression).
%asm {{
; need to copy the the cx16 virtual registers to zeropage to be compatible with C64...
sta P8ZP_SCRATCH_B1
lda cx16.r0
ldy cx16.r0+1
jsr string.length
tya
sec
sbc P8ZP_SCRATCH_B1
clc
adc cx16.r0
sta P8ZP_SCRATCH_W1
lda cx16.r0+1
adc #0
sta P8ZP_SCRATCH_W1+1
ldy cx16.r1
sty P8ZP_SCRATCH_W2
ldy cx16.r1+1
sty P8ZP_SCRATCH_W2+1
ldy P8ZP_SCRATCH_B1
lda #0
sta (P8ZP_SCRATCH_W2),y
cpy #0
bne _loop
rts
_loop dey
lda (P8ZP_SCRATCH_W1),y
sta (P8ZP_SCRATCH_W2),y
cpy #0
bne _loop
+ rts
}}
}
asmsub slice(uword source @R0, ubyte start @A, ubyte length @Y, uword target @R1) clobbers(A, Y) {
; Copies a segment from the source string, starting at the given index,
; and of the given length to target string.
; It is assumed the target string buffer is large enough to contain the result.
; Also, you have to make sure yourself that start and length are within bounds of the strings.
; Modifies in-place, doesnt return a value (so cant be used in an expression).
%asm {{
; need to copy the the cx16 virtual registers to zeropage to be compatible with C64...
; substr(source, target, start, length)
sta P8ZP_SCRATCH_B1
lda cx16.r0
sta P8ZP_SCRATCH_W1
lda cx16.r0+1
sta P8ZP_SCRATCH_W1+1
lda cx16.r1
sta P8ZP_SCRATCH_W2
lda cx16.r1+1
sta P8ZP_SCRATCH_W2+1
; adjust src location
clc
lda P8ZP_SCRATCH_W1
adc P8ZP_SCRATCH_B1
sta P8ZP_SCRATCH_W1
bcc +
inc P8ZP_SCRATCH_W1+1
+ lda #0
sta (P8ZP_SCRATCH_W2),y
beq _startloop
- lda (P8ZP_SCRATCH_W1),y
sta (P8ZP_SCRATCH_W2),y
_startloop dey
cpy #$ff
bne -
rts
}}
}
asmsub find(uword string @R0, ubyte character @A) -> ubyte @A, ubyte @Pc {
; Locates the first position of the given character in the string,
; returns Carry set if found + index in A, or Carry clear if not found.
%asm {{
; need to copy the the cx16 virtual registers to zeropage to make this run on C64...
sta P8ZP_SCRATCH_B1
lda cx16.r0
ldy cx16.r0+1
sta P8ZP_SCRATCH_W1
sty P8ZP_SCRATCH_W1+1
ldy #0
- lda (P8ZP_SCRATCH_W1),y
beq _notfound
cmp P8ZP_SCRATCH_B1
beq _found
iny
bne -
_notfound clc
rts
_found tya
sec
rts
}}
}
asmsub copy(uword source @R0, uword target @AY) clobbers(A) -> ubyte @Y {
; Copy a string to another, overwriting that one.
; Returns the length of the string that was copied.
; Often you dont have to call this explicitly and can just write string1 = string2
; but this function is useful if youre dealing with addresses for instance.
%asm {{
sta P8ZP_SCRATCH_W1
sty P8ZP_SCRATCH_W1+1
lda cx16.r0
ldy cx16.r0+1
jmp prog8_lib.strcpy
}}
}
asmsub compare(uword string1 @R0, uword string2 @AY) clobbers(Y) -> byte @A {
; Compares two strings for sorting.
; Returns -1 (255), 0 or 1 depending on wether string1 sorts before, equal or after string2.
; Note that you can also directly compare strings and string values with eachother using
; comparison operators ==, < etcetera (it will use strcmp for you under water automatically).
%asm {{
sta P8ZP_SCRATCH_W2
sty P8ZP_SCRATCH_W2+1
lda cx16.r0
ldy cx16.r0+1
jmp prog8_lib.strcmp_mem
}}
}
asmsub lower(uword st @AY) -> ubyte @Y {
; Lowercases the petscii string in-place. Returns length of the string.
; (for efficiency, non-letter characters > 128 will also not be left intact,
; but regular text doesn't usually contain those characters anyway.)
%asm {{
sta P8ZP_SCRATCH_W1
sty P8ZP_SCRATCH_W1+1
ldy #0
- lda (P8ZP_SCRATCH_W1),y
beq _done
and #$7f
cmp #97
bcc +
cmp #123
bcs +
and #%11011111
+ sta (P8ZP_SCRATCH_W1),y
iny
bne -
_done rts
}}
}
asmsub upper(uword st @AY) -> ubyte @Y {
; Uppercases the petscii string in-place. Returns length of the string.
%asm {{
sta P8ZP_SCRATCH_W1
sty P8ZP_SCRATCH_W1+1
ldy #0
- lda (P8ZP_SCRATCH_W1),y
beq _done
cmp #65
bcc +
cmp #91
bcs +
ora #%00100000
+ sta (P8ZP_SCRATCH_W1),y
iny
bne -
_done rts
}}
}
sub startswith(str st, str prefix) -> bool {
ubyte prefix_len = length(prefix)
ubyte str_len = length(st)
if prefix_len > str_len
return false
cx16.r9L = st[prefix_len]
st[prefix_len] = 0
cx16.r9H = compare(st, prefix) as ubyte
st[prefix_len] = cx16.r9L
return cx16.r9H==0
}
sub endswith(str st, str suffix) -> bool {
ubyte suffix_len = length(suffix)
ubyte str_len = length(st)
if suffix_len > str_len
return false
return compare(st + str_len - suffix_len, suffix) == 0
}
asmsub pattern_match(str string @AY, str pattern @R0) clobbers(Y) -> ubyte @A {
%asm {{
; pattern matching of a string.
; Input: cx16.r0: A NUL-terminated, <255-length pattern
; AY: A NUL-terminated, <255-length string
;
; Output: A = 1 if the string matches the pattern, A = 0 if not.
;
; Notes: Clobbers A, X, Y. Each * in the pattern uses 4 bytes of stack.
;
; see http://6502.org/source/strings/patmatch.htm
str = P8ZP_SCRATCH_W1
stx P8ZP_SCRATCH_REG
sta str
sty str+1
lda cx16.r0
sta modify_pattern1+1
sta modify_pattern2+1
lda cx16.r0+1
sta modify_pattern1+2
sta modify_pattern2+2
jsr _match
lda #0
adc #0
ldx P8ZP_SCRATCH_REG
rts
_match
ldx #$00 ; x is an index in the pattern
ldy #$ff ; y is an index in the string
modify_pattern1
next lda $ffff,x ; look at next pattern character MODIFIED
cmp #'*' ; is it a star?
beq star ; yes, do the complicated stuff
iny ; no, let's look at the string
cmp #'?' ; is the pattern caracter a ques?
bne reg ; no, it's a regular character
lda (str),y ; yes, so it will match anything
beq fail ; except the end of string
reg cmp (str),y ; are both characters the same?
bne fail ; no, so no match
inx ; yes, keep checking
cmp #0 ; are we at end of string?
bne next ; not yet, loop
found rts ; success, return with c=1
star inx ; skip star in pattern
modify_pattern2
cmp $ffff,x ; string of stars equals one star MODIFIED
beq star ; so skip them also
stloop txa ; we first try to match with * = ""
pha ; and grow it by 1 character every
tya ; time we loop
pha ; save x and y on stack
jsr next ; recursive call
pla ; restore x and y
tay
pla
tax
bcs found ; we found a match, return with c=1
iny ; no match yet, try to grow * string
lda (str),y ; are we at the end of string?
bne stloop ; not yet, add a character
fail clc ; yes, no match found, return with c=0
rts
}}
}
}