From 82165c1a7767043aa9cca856aaa57e7686a77975 Mon Sep 17 00:00:00 2001 From: Colin Leroy-Mira Date: Mon, 18 Mar 2024 18:40:45 +0100 Subject: [PATCH 1/6] Implement strcasestr --- include/string.h | 1 + libsrc/common/strcasestr.c | 36 +++++++++++++++++++++++++++++++++ test/val/strstr-test.c | 41 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+) create mode 100644 libsrc/common/strcasestr.c create mode 100644 test/val/strstr-test.c diff --git a/include/string.h b/include/string.h index abaf80e7d..b19f44e31 100644 --- a/include/string.h +++ b/include/string.h @@ -81,6 +81,7 @@ void __fastcall__ bzero (void* ptr, size_t n); /* BSD */ char* __fastcall__ strdup (const char* s); /* SYSV/BSD */ int __fastcall__ stricmp (const char* s1, const char* s2); /* DOS/Windows */ int __fastcall__ strcasecmp (const char* s1, const char* s2); /* Same for Unix */ +char* __fastcall__ strcasestr (const char* str, const char* substr); int __fastcall__ strnicmp (const char* s1, const char* s2, size_t count); /* DOS/Windows */ int __fastcall__ strncasecmp (const char* s1, const char* s2, size_t count); /* Same for Unix */ size_t __fastcall__ strnlen (const char* s, size_t maxlen); /* POSIX.1-2008 */ diff --git a/libsrc/common/strcasestr.c b/libsrc/common/strcasestr.c new file mode 100644 index 000000000..693b43a37 --- /dev/null +++ b/libsrc/common/strcasestr.c @@ -0,0 +1,36 @@ +/* +** strcasestr.c +** +** Colin Leroy-Mira, 2024 +*/ + + + +#include +#include +#include + + + +/*****************************************************************************/ +/* Code */ +/*****************************************************************************/ + + + +char* __fastcall__ strcasestr(const char *str, const char *substr) { + size_t len_a = strlen(str); + size_t len_b = strlen(substr); + const char *end_str; + + if (len_a < len_b) + return NULL; + + len_a -= len_b; + + for (end_str = str + len_a + 1; str < end_str; str++) { + if (!strncasecmp(str, substr, len_b)) + return (char *)str; + } + return NULL; +} diff --git a/test/val/strstr-test.c b/test/val/strstr-test.c new file mode 100644 index 000000000..5c8a147b0 --- /dev/null +++ b/test/val/strstr-test.c @@ -0,0 +1,41 @@ +#include +#include +#include + +int fails = 0; + +#define STRSTR_TEST(needle,expected) \ + if (strstr(haystack, (needle)) != (expected)) { \ + printf("strstr failure: expected %p for \"%s\", " \ + "got %p\n", \ + expected, needle, strstr(haystack, (needle)));\ + fails++; \ + } + +#define STRCASESTR_TEST(needle,expected) \ + if (strcasestr(haystack, (needle)) != (expected)) { \ + printf("strcasestr failure: expected %p for \"%s\", " \ + "got %p\n", \ + expected, needle, strcasestr(haystack, (needle)));\ + fails++; \ + } + +int main (void) +{ + const char *haystack = "This is a string to search in"; + + STRSTR_TEST("This is", haystack + 0); + STRSTR_TEST("a string", haystack + 8); + STRSTR_TEST("This is a string to search in", haystack); + STRSTR_TEST("search in", haystack + 20); + STRSTR_TEST("This is a string to search in with extra chars", NULL); + STRSTR_TEST("nowhere", NULL); + + STRCASESTR_TEST("this is", haystack + 0); + STRCASESTR_TEST("a STRING", haystack + 8); + STRCASESTR_TEST("this is a string TO search in", haystack); + STRCASESTR_TEST("This is a string to search in with extra chars", NULL); + STRCASESTR_TEST("search IN", haystack + 20); + + return fails; +} From b5d259bafb1b3bcf4eb2c1ddc92d14b3c891fd49 Mon Sep 17 00:00:00 2001 From: Colin Leroy-Mira Date: Mon, 18 Mar 2024 19:52:04 +0100 Subject: [PATCH 2/6] Implement strcasestr as part of strstr --- libsrc/common/strcasestr.c | 36 -------------------------- libsrc/common/strstr.s | 53 +++++++++++++++++++++++++------------- libsrc/common/tolower.s | 6 ++--- 3 files changed, 38 insertions(+), 57 deletions(-) delete mode 100644 libsrc/common/strcasestr.c diff --git a/libsrc/common/strcasestr.c b/libsrc/common/strcasestr.c deleted file mode 100644 index 693b43a37..000000000 --- a/libsrc/common/strcasestr.c +++ /dev/null @@ -1,36 +0,0 @@ -/* -** strcasestr.c -** -** Colin Leroy-Mira, 2024 -*/ - - - -#include -#include -#include - - - -/*****************************************************************************/ -/* Code */ -/*****************************************************************************/ - - - -char* __fastcall__ strcasestr(const char *str, const char *substr) { - size_t len_a = strlen(str); - size_t len_b = strlen(substr); - const char *end_str; - - if (len_a < len_b) - return NULL; - - len_a -= len_b; - - for (end_str = str + len_a + 1; str < end_str; str++) { - if (!strncasecmp(str, substr, len_b)) - return (char *)str; - } - return NULL; -} diff --git a/libsrc/common/strstr.s b/libsrc/common/strstr.s index 84f633245..9cc9c0d33 100644 --- a/libsrc/common/strstr.s +++ b/libsrc/common/strstr.s @@ -4,11 +4,18 @@ ; char* strstr (const char* haystack, const char* needle); ; - .export _strstr - .import popptr1 - .importzp ptr1, ptr2, ptr3, ptr4, tmp1 + .export _strstr, _strcasestr + .import popptr1, _tolower + .importzp ptr1, ptr2, ptr3, ptr4, tmp1, tmp2, tmp3 _strstr: + ldy #$01 + bne :+ +_strcasestr: + ldy #$00 +: + sty tmp2 ; Set case sensitivity + sta ptr2 ; Save needle stx ptr2+1 sta ptr4 ; Setup temp copy for later @@ -24,10 +31,19 @@ _strstr: ; Search for the beginning of the string (this is not an optimal search ; strategy [in fact, it's pretty dumb], but it's simple to implement). - sta tmp1 ; Save start of needle + ldx tmp2 ; Lowercase if needed + bne :+ + jsr _tolower + +: sta tmp1 ; Save start of needle @L1: lda (ptr1),y ; Get next char from haystack beq @NotFound ; Jump if end - cmp tmp1 ; Start of needle found? + + ldx tmp2 ; Lowercase if needed + bne :+ + jsr _tolower + +: cmp tmp1 ; Start of needle found? beq @L2 ; Jump if so iny ; Next char bne @L1 @@ -43,7 +59,7 @@ _strstr: bcc @L3 inc ptr1+1 -; ptr1 points to the start of needle now. Setup temporary pointers for the +; ptr1 points to the start of needle in haystack now. Setup temporary pointers for the ; search. The low byte of ptr4 is already set. @L3: sta ptr3 @@ -57,7 +73,19 @@ _strstr: @L4: lda (ptr4),y ; Get char from needle beq @Found ; Jump if end of needle (-> found) - cmp (ptr3),y ; Compare with haystack + + ldx tmp2 ; Lowercase if needed + bne :+ + jsr _tolower +: sta tmp3 + + lda (ptr3),y ; Compare with haystack + + ldx tmp2 ; Lowercase if needed + bne :+ + jsr _tolower + +: cmp tmp3 bne @L5 ; Jump if not equal iny ; Next char bne @L4 @@ -82,14 +110,3 @@ _strstr: lda #$00 ; return NULL tax rts - - - - - - - - - - - diff --git a/libsrc/common/tolower.s b/libsrc/common/tolower.s index 828be1cb1..22b030da3 100644 --- a/libsrc/common/tolower.s +++ b/libsrc/common/tolower.s @@ -17,12 +17,12 @@ _tolower: cpx #$00 ; out of range? bne @L2 ; if so, return the argument unchanged - tay ; save char + pha ; save char jsr ctypemaskdirect ; get character classification and #CT_UPPER ; upper case char? beq @L1 ; jump if no - tya ; restore char + pla ; restore char adc #<('a'-'A') ; make lower case char (ctypemaskdirect ensures carry clear) rts -@L1: tya ; restore char +@L1: pla ; restore char @L2: rts From 0c681b42ef96c53c429962a47011673e4f6ed88b Mon Sep 17 00:00:00 2001 From: Colin Leroy-Mira Date: Tue, 19 Mar 2024 18:07:17 +0100 Subject: [PATCH 3/6] Factorize to save 20 bytes --- libsrc/common/strstr.s | 45 ++++++++++++++++------------------------- libsrc/common/tolower.s | 7 ++++--- 2 files changed, 21 insertions(+), 31 deletions(-) diff --git a/libsrc/common/strstr.s b/libsrc/common/strstr.s index 9cc9c0d33..d24f1b0c9 100644 --- a/libsrc/common/strstr.s +++ b/libsrc/common/strstr.s @@ -5,16 +5,18 @@ ; .export _strstr, _strcasestr - .import popptr1, _tolower - .importzp ptr1, ptr2, ptr3, ptr4, tmp1, tmp2, tmp3 + .import popptr1, return0, tolower_a + .importzp ptr1, ptr2, ptr3, ptr4, tmp1, tmp2 + +maybe_lower: ; Lowercase char in A if needed + jmp tolower_a ; patched on entry with either JMP or RTS _strstr: - ldy #$01 + ldy #$60 ; RTS bne :+ _strcasestr: - ldy #$00 -: - sty tmp2 ; Set case sensitivity + ldy #$4C ; JMP absolute +: sty maybe_lower sta ptr2 ; Save needle stx ptr2+1 @@ -31,19 +33,13 @@ _strcasestr: ; Search for the beginning of the string (this is not an optimal search ; strategy [in fact, it's pretty dumb], but it's simple to implement). - ldx tmp2 ; Lowercase if needed - bne :+ - jsr _tolower - -: sta tmp1 ; Save start of needle + jsr maybe_lower ; Lowercase if needed + sta tmp1 ; Save start of needle @L1: lda (ptr1),y ; Get next char from haystack beq @NotFound ; Jump if end - ldx tmp2 ; Lowercase if needed - bne :+ - jsr _tolower - -: cmp tmp1 ; Start of needle found? + jsr maybe_lower ; Lowercase if needed + cmp tmp1 ; Start of needle found? beq @L2 ; Jump if so iny ; Next char bne @L1 @@ -74,18 +70,13 @@ _strcasestr: @L4: lda (ptr4),y ; Get char from needle beq @Found ; Jump if end of needle (-> found) - ldx tmp2 ; Lowercase if needed - bne :+ - jsr _tolower -: sta tmp3 + jsr maybe_lower ; Lowercase if needed + sta tmp2 lda (ptr3),y ; Compare with haystack - ldx tmp2 ; Lowercase if needed - bne :+ - jsr _tolower - -: cmp tmp3 + jsr maybe_lower ; Lowercase if needed + cmp tmp2 bne @L5 ; Jump if not equal iny ; Next char bne @L4 @@ -107,6 +98,4 @@ _strcasestr: ; We reached end of haystack without finding needle @NotFound: - lda #$00 ; return NULL - tax - rts + jmp return0 ; return NULL diff --git a/libsrc/common/tolower.s b/libsrc/common/tolower.s index 22b030da3..4d02e4dfb 100644 --- a/libsrc/common/tolower.s +++ b/libsrc/common/tolower.s @@ -10,13 +10,14 @@ ; int tolower (int c); ; - .export _tolower + .export _tolower, tolower_a .include "ctype.inc" .import ctypemaskdirect _tolower: cpx #$00 ; out of range? - bne @L2 ; if so, return the argument unchanged + bne out ; if so, return the argument unchanged +tolower_a: pha ; save char jsr ctypemaskdirect ; get character classification and #CT_UPPER ; upper case char? @@ -25,4 +26,4 @@ _tolower: adc #<('a'-'A') ; make lower case char (ctypemaskdirect ensures carry clear) rts @L1: pla ; restore char -@L2: rts +out: rts From 71d82ab5d9eb0adf3719955904549ff4ec547a75 Mon Sep 17 00:00:00 2001 From: Colin Leroy-Mira Date: Fri, 22 Mar 2024 17:19:26 +0100 Subject: [PATCH 4/6] Use common naming scheme for tolowerdirect --- libsrc/common/strstr.s | 4 ++-- libsrc/common/tolower.s | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libsrc/common/strstr.s b/libsrc/common/strstr.s index d24f1b0c9..6ab46148c 100644 --- a/libsrc/common/strstr.s +++ b/libsrc/common/strstr.s @@ -5,11 +5,11 @@ ; .export _strstr, _strcasestr - .import popptr1, return0, tolower_a + .import popptr1, return0, tolowerdirect .importzp ptr1, ptr2, ptr3, ptr4, tmp1, tmp2 maybe_lower: ; Lowercase char in A if needed - jmp tolower_a ; patched on entry with either JMP or RTS + jmp tolowerdirect ; patched on entry with either JMP or RTS _strstr: ldy #$60 ; RTS diff --git a/libsrc/common/tolower.s b/libsrc/common/tolower.s index 4d02e4dfb..9c143f1ce 100644 --- a/libsrc/common/tolower.s +++ b/libsrc/common/tolower.s @@ -10,14 +10,14 @@ ; int tolower (int c); ; - .export _tolower, tolower_a + .export _tolower, tolowerdirect .include "ctype.inc" .import ctypemaskdirect _tolower: cpx #$00 ; out of range? bne out ; if so, return the argument unchanged -tolower_a: +tolowerdirect: pha ; save char jsr ctypemaskdirect ; get character classification and #CT_UPPER ; upper case char? From a823d900823056c2109c4de362c6cbcfcc512b7d Mon Sep 17 00:00:00 2001 From: Colin Leroy-Mira Date: Fri, 19 Apr 2024 07:57:47 +0200 Subject: [PATCH 5/6] Separated versions --- libsrc/common/strcasestr.s | 95 ++++++++++++++++++++++++++++++++++++++ libsrc/common/strstr.s | 33 ++++--------- 2 files changed, 103 insertions(+), 25 deletions(-) create mode 100644 libsrc/common/strcasestr.s diff --git a/libsrc/common/strcasestr.s b/libsrc/common/strcasestr.s new file mode 100644 index 000000000..58364f419 --- /dev/null +++ b/libsrc/common/strcasestr.s @@ -0,0 +1,95 @@ +; +; Ullrich von Bassewitz, 11.12.1998 +; +; char* strcasestr (const char* haystack, const char* needle); +; + + .export _strcasestr + .import popptr1, return0, tolowerdirect + .importzp ptr1, ptr2, ptr3, ptr4, tmp1, tmp2, tmp3, tmp4 + .include "ctype.inc" + + .segment "LOWCODE" + +_strcasestr: + sta ptr2 ; Save needle + stx ptr2+1 + sta ptr4 ; Setup temp copy for later + + jsr popptr1 ; Get haystack to ptr1 + +; If needle is empty, return haystack + + ; ldy #$00 Y=0 guaranteed by popptr1 + lda (ptr2),y ; Get first byte of needle + beq @Found ; Needle is empty --> we're done + +; Search for the beginning of the string (this is not an optimal search +; strategy [in fact, it's pretty dumb], but it's simple to implement). + + jsr tolowerdirect ; Lowercase + sta tmp1 ; Save start of needle +@L1: lda (ptr1),y ; Get next char from haystack + beq @NotFound ; Jump if end + + jsr tolowerdirect ; Lowercase + cmp tmp1 ; Start of needle found? + beq @L2 ; Jump if so + iny ; Next char + bne @L1 + inc ptr1+1 ; Bump high byte + bne @L1 ; Branch always + +; We found the start of needle in haystack + +@L2: tya ; Get offset + clc + adc ptr1 + sta ptr1 ; Make ptr1 point to start + bcc @L3 + inc ptr1+1 + +; ptr1 points to the start of needle in haystack now. Setup temporary pointers for the +; search. The low byte of ptr4 is already set. + +@L3: sta ptr3 + lda ptr1+1 + sta ptr3+1 + lda ptr2+1 + sta ptr4+1 + ldy #1 ; First char is identical, so start on second + +; Do the compare + +@L4: lda (ptr4),y ; Get char from needle + beq @Found ; Jump if end of needle (-> found) + + jsr tolowerdirect ; Lowercase + sta tmp2 + + lda (ptr3),y ; Compare with haystack + + jsr tolowerdirect ; Lowercase + cmp tmp2 + bne @L5 ; Jump if not equal + iny ; Next char + bne @L4 + inc ptr3+1 + inc ptr4+1 ; Bump hi byte of pointers + bne @L4 ; Next char (branch always) + +; The strings did not compare equal, search next start of needle + +@L5: ldy #1 ; Start after this char + bne @L1 ; Branch always + +; We found the start of needle + +@Found: lda ptr1 + ldx ptr1+1 + rts + +; We reached end of haystack without finding needle + +@NotFound: + jmp return0 ; return NULL diff --git a/libsrc/common/strstr.s b/libsrc/common/strstr.s index 6ab46148c..691e5ba5c 100644 --- a/libsrc/common/strstr.s +++ b/libsrc/common/strstr.s @@ -4,20 +4,11 @@ ; char* strstr (const char* haystack, const char* needle); ; - .export _strstr, _strcasestr - .import popptr1, return0, tolowerdirect - .importzp ptr1, ptr2, ptr3, ptr4, tmp1, tmp2 - -maybe_lower: ; Lowercase char in A if needed - jmp tolowerdirect ; patched on entry with either JMP or RTS + .export _strstr + .import popptr1 + .importzp ptr1, ptr2, ptr3, ptr4, tmp1 _strstr: - ldy #$60 ; RTS - bne :+ -_strcasestr: - ldy #$4C ; JMP absolute -: sty maybe_lower - sta ptr2 ; Save needle stx ptr2+1 sta ptr4 ; Setup temp copy for later @@ -33,12 +24,9 @@ _strcasestr: ; Search for the beginning of the string (this is not an optimal search ; strategy [in fact, it's pretty dumb], but it's simple to implement). - jsr maybe_lower ; Lowercase if needed sta tmp1 ; Save start of needle @L1: lda (ptr1),y ; Get next char from haystack beq @NotFound ; Jump if end - - jsr maybe_lower ; Lowercase if needed cmp tmp1 ; Start of needle found? beq @L2 ; Jump if so iny ; Next char @@ -55,7 +43,7 @@ _strcasestr: bcc @L3 inc ptr1+1 -; ptr1 points to the start of needle in haystack now. Setup temporary pointers for the +; ptr1 points to the start of needle now. Setup temporary pointers for the ; search. The low byte of ptr4 is already set. @L3: sta ptr3 @@ -69,14 +57,7 @@ _strcasestr: @L4: lda (ptr4),y ; Get char from needle beq @Found ; Jump if end of needle (-> found) - - jsr maybe_lower ; Lowercase if needed - sta tmp2 - - lda (ptr3),y ; Compare with haystack - - jsr maybe_lower ; Lowercase if needed - cmp tmp2 + cmp (ptr3),y ; Compare with haystack bne @L5 ; Jump if not equal iny ; Next char bne @L4 @@ -98,4 +79,6 @@ _strcasestr: ; We reached end of haystack without finding needle @NotFound: - jmp return0 ; return NULL + lda #$00 ; return NULL + tax + rts From 793aa48a4943752e4a24731f652ca0ee1b485b81 Mon Sep 17 00:00:00 2001 From: Colin Leroy-Mira Date: Fri, 19 Apr 2024 08:13:41 +0200 Subject: [PATCH 6/6] Add doc --- doc/funcref.sgml | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/doc/funcref.sgml b/doc/funcref.sgml index 81c63a38b..a0a6d7ca8 100644 --- a/doc/funcref.sgml +++ b/doc/funcref.sgml @@ -780,6 +780,7 @@ communication, see also testcode/lib/ser-test.c. + @@ -7899,22 +7900,47 @@ be used in presence of a prototype. -strstr