; 0-terminated string manipulation routines. string { %option no_symbol_prefixing asmsub length(uword string @AY) clobbers(A) -> ubyte @Y { ; Returns the number of bytes in the string. ; This value is determined during runtime and counts upto the first terminating 0 byte in the string, ; regardless of the size of the string during compilation time. Don’t confuse this with len and sizeof! %asm {{ sta P8ZP_SCRATCH_W1 sty P8ZP_SCRATCH_W1+1 ldy #0 - lda (P8ZP_SCRATCH_W1),y beq + iny bne - + rts }} } asmsub left(uword source @R0, ubyte length @A, uword target @R1) clobbers(A, Y) { ; Copies the left side of the source string of the given length to target string. ; It is assumed the target string buffer is large enough to contain the result. ; Also, you have to make sure yourself that length is smaller or equal to the length of the source string. ; Modifies in-place, doesn’t return a value (so can’t be used in an expression). %asm {{ ; need to copy the the cx16 virtual registers to zeropage to be compatible with C64... ldy cx16.r0 sty P8ZP_SCRATCH_W1 ldy cx16.r0+1 sty P8ZP_SCRATCH_W1+1 ldy cx16.r1 sty P8ZP_SCRATCH_W2 ldy cx16.r1+1 sty P8ZP_SCRATCH_W2+1 tay lda #0 sta (P8ZP_SCRATCH_W2),y cpy #0 bne _loop rts _loop dey lda (P8ZP_SCRATCH_W1),y sta (P8ZP_SCRATCH_W2),y cpy #0 bne _loop + rts }} ; asmgen.out(" jsr prog8_lib.func_leftstr") } asmsub right(uword source @R0, ubyte length @A, uword target @R1) clobbers(A,Y) { ; Copies the right side of the source string of the given length to target string. ; It is assumed the target string buffer is large enough to contain the result. ; Also, you have to make sure yourself that length is smaller or equal to the length of the source string. ; Modifies in-place, doesn’t return a value (so can’t be used in an expression). %asm {{ ; need to copy the the cx16 virtual registers to zeropage to be compatible with C64... sta P8ZP_SCRATCH_B1 lda cx16.r0 ldy cx16.r0+1 jsr string.length tya sec sbc P8ZP_SCRATCH_B1 clc adc cx16.r0 sta P8ZP_SCRATCH_W1 lda cx16.r0+1 adc #0 sta P8ZP_SCRATCH_W1+1 ldy cx16.r1 sty P8ZP_SCRATCH_W2 ldy cx16.r1+1 sty P8ZP_SCRATCH_W2+1 ldy P8ZP_SCRATCH_B1 lda #0 sta (P8ZP_SCRATCH_W2),y cpy #0 bne _loop rts _loop dey lda (P8ZP_SCRATCH_W1),y sta (P8ZP_SCRATCH_W2),y cpy #0 bne _loop + rts }} } asmsub slice(uword source @R0, ubyte start @A, ubyte length @Y, uword target @R1) clobbers(A, Y) { ; Copies a segment from the source string, starting at the given index, ; and of the given length to target string. ; It is assumed the target string buffer is large enough to contain the result. ; Also, you have to make sure yourself that start and length are within bounds of the strings. ; Modifies in-place, doesn’t return a value (so can’t be used in an expression). %asm {{ ; need to copy the the cx16 virtual registers to zeropage to be compatible with C64... ; substr(source, target, start, length) sta P8ZP_SCRATCH_B1 lda cx16.r0 sta P8ZP_SCRATCH_W1 lda cx16.r0+1 sta P8ZP_SCRATCH_W1+1 lda cx16.r1 sta P8ZP_SCRATCH_W2 lda cx16.r1+1 sta P8ZP_SCRATCH_W2+1 ; adjust src location clc lda P8ZP_SCRATCH_W1 adc P8ZP_SCRATCH_B1 sta P8ZP_SCRATCH_W1 bcc + inc P8ZP_SCRATCH_W1+1 + lda #0 sta (P8ZP_SCRATCH_W2),y beq _startloop - lda (P8ZP_SCRATCH_W1),y sta (P8ZP_SCRATCH_W2),y _startloop dey cpy #$ff bne - rts }} } asmsub find(uword string @R0, ubyte character @A) -> ubyte @A, bool @Pc { ; Locates the first position of the given character in the string, ; returns Carry set if found + index in A, or A=0 + Carry clear if not found. %asm {{ ; need to copy the the cx16 virtual registers to zeropage to make this run on C64... sta P8ZP_SCRATCH_B1 lda cx16.r0 ldy cx16.r0+1 sta P8ZP_SCRATCH_W1 sty P8ZP_SCRATCH_W1+1 ldy #0 - lda (P8ZP_SCRATCH_W1),y beq _notfound cmp P8ZP_SCRATCH_B1 beq _found iny bne - _notfound lda #0 clc rts _found tya sec rts }} } asmsub copy(uword source @R0, uword target @AY) clobbers(A) -> ubyte @Y { ; Copy a string to another, overwriting that one. ; Returns the length of the string that was copied. ; Often you don’t have to call this explicitly and can just write string1 = string2 ; but this function is useful if you’re dealing with addresses for instance. %asm {{ sta P8ZP_SCRATCH_W1 sty P8ZP_SCRATCH_W1+1 lda cx16.r0 ldy cx16.r0+1 jmp prog8_lib.strcpy }} } asmsub append(uword target @R0, uword suffix @R1) clobbers(Y) -> ubyte @A { ; Append the suffix string to the target. (make sure the buffer is large enough!) ; Returns the length of the resulting string. %asm {{ lda cx16.r0 ldy cx16.r0+1 jsr length sty P8ZP_SCRATCH_B1 tya clc adc cx16.r0 sta P8ZP_SCRATCH_W1 lda cx16.r0+1 adc #0 sta P8ZP_SCRATCH_W1+1 lda cx16.r1 ldy cx16.r1+1 jsr prog8_lib.strcpy tya clc adc P8ZP_SCRATCH_B1 rts }} } asmsub compare(uword string1 @R0, uword string2 @AY) clobbers(Y) -> byte @A { ; Compares two strings for sorting. ; Returns -1 (255), 0 or 1 depending on wether string1 sorts before, equal or after string2. ; Note that you can also directly compare strings and string values with eachother using ; comparison operators ==, < etcetera (it will use strcmp for you under water automatically). %asm {{ sta P8ZP_SCRATCH_W2 sty P8ZP_SCRATCH_W2+1 lda cx16.r0 ldy cx16.r0+1 jmp prog8_lib.strcmp_mem }} } asmsub lower(uword st @AY) -> ubyte @Y { ; Lowercases the petscii string in-place. Returns length of the string. ; (for efficiency, non-letter characters > 128 will also not be left intact, ; but regular text doesn't usually contain those characters anyway.) %asm {{ sta P8ZP_SCRATCH_W1 sty P8ZP_SCRATCH_W1+1 ldy #0 - lda (P8ZP_SCRATCH_W1),y beq _done and #$7f cmp #97 bcc + cmp #123 bcs + and #%11011111 + sta (P8ZP_SCRATCH_W1),y iny bne - _done rts }} } asmsub upper(uword st @AY) -> ubyte @Y { ; Uppercases the petscii string in-place. Returns length of the string. %asm {{ sta P8ZP_SCRATCH_W1 sty P8ZP_SCRATCH_W1+1 ldy #0 - lda (P8ZP_SCRATCH_W1),y beq _done cmp #65 bcc + cmp #91 bcs + ora #%00100000 + sta (P8ZP_SCRATCH_W1),y iny bne - _done rts }} } asmsub lowerchar(ubyte character @A) -> ubyte @A { %asm {{ and #$7f cmp #97 bcc + cmp #123 bcs + and #%11011111 + rts }} } asmsub upperchar(ubyte character @A) -> ubyte @A { %asm {{ cmp #65 bcc + cmp #91 bcs + ora #%00100000 + rts }} } sub startswith(str st, str prefix) -> bool { ubyte prefix_len = length(prefix) ubyte str_len = length(st) if prefix_len > str_len return false cx16.r9L = st[prefix_len] st[prefix_len] = 0 cx16.r9H = compare(st, prefix) as ubyte st[prefix_len] = cx16.r9L return cx16.r9H==0 } sub endswith(str st, str suffix) -> bool { ubyte suffix_len = length(suffix) ubyte str_len = length(st) if suffix_len > str_len return false return compare(st + str_len - suffix_len, suffix) == 0 } asmsub pattern_match(str string @AY, str pattern @R0) clobbers(Y) -> ubyte @A { %asm {{ ; pattern matching of a string. ; Input: cx16.r0: A NUL-terminated, <255-length pattern ; AY: A NUL-terminated, <255-length string ; ; Output: A = 1 if the string matches the pattern, A = 0 if not. ; ; Notes: Clobbers A, X, Y. Each * in the pattern uses 4 bytes of stack. ; ; see http://6502.org/source/strings/patmatch.htm strptr = P8ZP_SCRATCH_W1 sta strptr sty strptr+1 lda cx16.r0 sta modify_pattern1+1 sta modify_pattern2+1 lda cx16.r0+1 sta modify_pattern1+2 sta modify_pattern2+2 jsr _match lda #0 rol a rts _match ldx #$00 ; x is an index in the pattern ldy #$ff ; y is an index in the string modify_pattern1 next lda $ffff,x ; look at next pattern character MODIFIED cmp #'*' ; is it a star? beq star ; yes, do the complicated stuff iny ; no, let's look at the string cmp #'?' ; is the pattern caracter a ques? bne reg ; no, it's a regular character lda (strptr),y ; yes, so it will match anything beq fail ; except the end of string reg cmp (strptr),y ; are both characters the same? bne fail ; no, so no match inx ; yes, keep checking cmp #0 ; are we at end of string? bne next ; not yet, loop found rts ; success, return with c=1 star inx ; skip star in pattern modify_pattern2 cmp $ffff,x ; string of stars equals one star MODIFIED beq star ; so skip them also stloop txa ; we first try to match with * = "" pha ; and grow it by 1 character every tya ; time we loop pha ; save x and y on stack jsr next ; recursive call pla ; restore x and y tay pla tax bcs found ; we found a match, return with c=1 iny ; no match yet, try to grow * string lda (strptr),y ; are we at the end of string? bne stloop ; not yet, add a character fail clc ; yes, no match found, return with c=0 rts }} } asmsub hash(str string @R0) -> ubyte @A { ; experimental 8 bit hashing function. ; hash(-1)=179; clear carry; hash(i) = ROL hash(i-1) XOR string[i] ; On the English word list in /usr/share/dict/words it seems to have a pretty even distribution %asm {{ lda #179 sta P8ZP_SCRATCH_REG ldy #0 clc - lda (cx16.r0),y beq + rol P8ZP_SCRATCH_REG eor P8ZP_SCRATCH_REG sta P8ZP_SCRATCH_REG iny bne - + lda P8ZP_SCRATCH_REG rts }} } asmsub isdigit(ubyte character @A) -> bool @Pc { %asm {{ cmp #'0' bcs + rts + cmp #'9'+1 bcc + clc rts + sec rts }} } asmsub isupper(ubyte character @A) -> bool @Pc { ; shifted petscii has 2 ranges that contain the upper case letters... %asm {{ cmp #97 bcs + rts + cmp #122+1 bcc _yes cmp #193 bcs + rts + cmp #218+1 bcc _yes clc rts _yes sec rts }} } asmsub islower(ubyte character @A) -> bool @Pc { %asm {{ cmp #'a' bcs + rts + cmp #'z'+1 bcc + clc rts + sec rts }} } asmsub isletter(ubyte character @A) -> bool @Pc { %asm {{ jsr islower bcs + jmp isupper + rts }} } }