diff --git a/disassembler/x65dsasm.cpp b/disassembler/x65dsasm.cpp index 3af871c..e33a7e0 100644 --- a/disassembler/x65dsasm.cpp +++ b/disassembler/x65dsasm.cpp @@ -38,6 +38,7 @@ #include "struse.h" // https://github.com/Sakrac/struse/blob/master/struse.h #include #include +#include static const char* aAddrModeFmt[] = { "%s ($%02x,x)", // 00 @@ -73,6 +74,40 @@ static const char* aAddrModeFmt[] = { "%s $%04x", // 1b }; +static const char* aAddrModeFmtSrc[] = { + "%s (%s,x)", // 00 + "%s %s", // 01 + "%s #%s", // 02 + "%s %s", // 03 + "%s (%s),y", // 04 + "%s %s,x", // 05 + "%s %s,y", // 06 + "%s %s,x", // 07 + "%s (%s)", // 08 + "%s A", // 09 + "%s ", // 0a + "%s (%s)", // 0b + "%s (%s,x)", // 0c + "%s $%02x, %s", // 0d + "%s [%s]", // 0e + "%s [%s],y", // 0f + "%s %s", // 10 + "%s %s,x", // 11 + "%s %s,s", // 12 + "%s (%s,s),y", // 13 + "%s [%s]", // 14 + "%s $%02x,%s", // 15 + + "%s %s,y", // 16 + "%s (%s,y)", // 17 + + "%s #%s", // 18 + "%s #%s", // 19 + + "%s %s", // 1a + "%s %s", // 1b +}; + enum AddressModes { // address mode bit index @@ -902,26 +937,52 @@ struct dismnm a65816_ops[256] = { { "sbc", AM_ABS_L_X, 0x03 }, }; +enum RefType { + RT_NONE, + RT_BRANCH, // bne, etc. + RT_JUMP, // jmp + RT_JSR, // jsr + RT_DATA, // lda $... -void Disassemble(strref filename, unsigned char *mem, size_t bytes, bool acc_16, bool ind_16, int addr, const dismnm *opcodes) + RT_COUNT +}; + +const char *aRefNames[RT_COUNT] = { "???", "branch", "jump", "subroutine", "data" }; + +struct RefLink { + int instr_addr; + RefType type; +}; + +struct RefAddr { + int address; // address + std::vector *pRefs; // what is referencing this address + + RefAddr() : address(-1), pRefs(nullptr) {} + RefAddr(int addr) : address(addr), pRefs(nullptr) {} +}; + +static const strref _jsr("jsr"); +static const strref _jmp("jmp"); + +std::vector refs; + +static int _sortRefs(const void *A, const void *B) { - FILE *f = stdout; - bool opened = false; - if (filename) { - f = fopen(strown<512>(filename).c_str(), "w"); - if (!f) - return; - opened = true; - } + return ((const RefLink*)A)->instr_addr - ((const RefLink*)B)->instr_addr; +} - strref prev_src; - int prev_offs = 0; +void GetReferences(unsigned char *mem, size_t bytes, bool acc_16, bool ind_16, int addr, const dismnm *opcodes) +{ + int start_addr = addr; + int end_addr = addr + (int)bytes; + refs.push_back(RefAddr(start_addr)); + refs[0].pRefs = new std::vector(); - strown<256> out; while (bytes) { unsigned char op = *mem++; + int curr = addr; bytes--; - out.sprintf("$%04x ", addr); addr++; int arg_size = opcodes[op].arg_size;; @@ -934,65 +995,310 @@ void Disassemble(strref filename, unsigned char *mem, size_t bytes, bool acc_16, arg_size = ind_16 ? 2 : 1; break; } - addr += arg_size; - if (arg_size > bytes) - return; - bytes -= arg_size; + int reference = -1; + RefType type = RT_NONE; - out.sprintf_append("%02x ", op); - for (int n = 0; n < arg_size; n++) - out.sprintf_append("%02x ", mem[n]); - - out.append_to(' ', 18); - - const char *fmt = aAddrModeFmt[mode]; - switch (mode) { - case AM_ABS: // 3 $1234 - case AM_ABS_Y: // 6 $1234,y - case AM_ABS_X: // 7 $1234,x - case AM_REL: // 8 ($1234) - case AM_REL_X: // c ($1234,x) - case AM_REL_L: // 14 [$1234] - out.sprintf_append(fmt, opcodes[op].name, (int)mem[0] | ((int)mem[1])<<8); - break; - - case AM_ABS_L: // 10 $bahilo - case AM_ABS_L_X: // 11 $123456,x - out.sprintf_append(fmt, opcodes[op].name, (int)mem[0] | ((int)mem[1])<<8 | ((int)mem[2])<<16); - break; - - case AM_IMM_DBL_A: // 18 #$12/#$1234 - case AM_IMM_DBL_I: // 19 #$12/#$1234 - if (arg_size==2) - out.sprintf_append("%s #$%04x", opcodes[op].name, (int)mem[0] | ((int)mem[1])<<8); - else - out.sprintf_append(fmt, opcodes[op].name, mem[0]); - break; - - case AM_BRANCH: // beq $1234 - out.sprintf_append(fmt, opcodes[op].name, addr + (char)mem[0]); - break; - - case AM_BRANCH_L: // brl $1234 - out.sprintf_append(fmt, opcodes[op].name, addr + ((short)(char)mem[0] + (((short)(char)mem[1])<<8))); - break; - - case AM_ZP_ABS: // d $12, *+$12 - out.sprintf_append(fmt, opcodes[op].name, mem[0], addr + (char)mem[1]); - break; - - default: - out.sprintf_append(fmt, opcodes[op].name, mem[0], mem[1]); - break; + if (mode == AM_BRANCH) { + reference = curr + 2 + (char)*mem; + type = RT_BRANCH; + } else if (mode == AM_BRANCH_L) { + reference = curr + 2 + (short)(unsigned short)mem[0] + ((unsigned short)mem[1]<<8); + type = RT_BRANCH; + } else if (mode == AM_ABS || mode == AM_ABS_Y || mode == AM_ABS_X || mode == AM_REL || mode == AM_REL_X || mode == AM_REL_L) { + reference = (unsigned short)mem[0] + ((unsigned short)mem[1]<<8); + if (_jsr.same_str(opcodes[op].name)) + type = RT_JSR; + else if (_jmp.same_str(opcodes[op].name)) + type = RT_JUMP; + else + type = RT_DATA; } + if (reference >= start_addr && reference <= end_addr && type != RT_NONE) { + bool found = false; + for (std::vector::iterator i = refs.begin(); i != refs.end(); ++i) { + if (i->address == reference) { + struct RefLink ref = { curr, type }; + i->pRefs->push_back(ref); + found = true; + break; + } + } + if (!found) { + refs.push_back(RefAddr(reference)); + struct RefAddr &last = refs[refs.size()-1]; + struct RefLink ref = { curr, type }; + last.pRefs = new std::vector(); + last.pRefs->push_back(ref); + } + } + + addr += arg_size; mem += arg_size; - out.append('\n'); - fputs(out.c_str(), f); + if (arg_size > (int)bytes) + break; + bytes -= arg_size; + } + if (refs.size()) + qsort(&refs[0], refs.size(), sizeof(RefAddr), _sortRefs); +} + +static const char spacing[] = " "; +void Disassemble(strref filename, unsigned char *mem, size_t bytes, bool acc_16, bool ind_16, int addr, const dismnm *opcodes, bool src) +{ + FILE *f = stdout; + bool opened = false; + if (filename) { + f = fopen(strown<512>(filename).c_str(), "w"); + if (!f) + return; + opened = true; + } + + const char *spc = src ? "" : spacing; + + strref prev_src; + int prev_offs = 0; + int end_addr = addr + (int)bytes; + + refs.clear(); + GetReferences(mem, bytes, acc_16, ind_16, addr, opcodes); + + int curr_label_index = 0; + bool values_data = false; + bool separator = false; + + strown<256> out; + while (bytes) { + + // update label index? + while (curr_label_index < (int)(refs.size()-1) && addr >= refs[curr_label_index+1].address) { + curr_label_index++; + struct RefAddr &ref = refs[curr_label_index]; + if ((values_data || separator) && ref.pRefs && ref.pRefs->size() && (*ref.pRefs)[0].type == RT_DATA) { + values_data = true; + for (int j = 1; values_data && jsize(); j++) { + values_data = (*ref.pRefs)[0].type == RT_DATA; + } + } else + values_data = false; + } + // Determine if current address is referenced from somewhere + if (addr == refs[curr_label_index].address) { + struct RefAddr &ref = refs[curr_label_index]; + if (ref.pRefs) { + for (size_t j = 0; jsize(); ++j) { + if (src) { + struct RefLink &lnk = (*ref.pRefs)[j]; + int lbl = -1; + int prv_addr = 0; + int ref_addr = lnk.instr_addr; + for (size_t k = 0; kref_addr) + break; + lbl = (int)k; + prv_addr = refs[k].address; + } + out.sprintf("%s; Referenced from Label_%d + $%x (%s)\n", spc, lbl, ref_addr - prv_addr, aRefNames[(*ref.pRefs)[j].type]); + } else + out.sprintf("%s; Referenced from $%04x (%s)\n", spc, (*ref.pRefs)[j].instr_addr, aRefNames[(*ref.pRefs)[j].type]); + fputs(out.c_str(), f); + } + } + out.sprintf("%sLabel_%d:\n", spc, curr_label_index); + fputs(out.c_str(), f); + } + if (src && values_data) { + out.clear(); + int left = end_addr - addr; + if (curr_label_index < (int)(refs.size()-2)) + left = refs[curr_label_index+1].address - addr; + for (int i = 0; i (int)bytes) + return; + bytes -= arg_size; + + if (!src) { + out.sprintf_append("%02x ", op); + for (int n = 0; n < arg_size; n++) + out.sprintf_append("%02x ", mem[n]); + } + + out.append_to(' ', src ? 2 : 18); + + int reference = -1; + separator = false; + if (op == 0x60) { // rts + separator = true; + for (size_t i = 0; i &pRefs = *refs[i].pRefs; + if (refs[i].address<=curr_addr) { + for (size_t j = 0; jcurr_addr) + separator = false; + } + } else { + for (size_t j = 0; j lblname; + if (reference>=0) { + for (size_t i = 0; i=0; --i) { + if (refs[i].pRefs) + delete refs[i].pRefs; + refs.erase(refs.begin() + i); + } + } @@ -1003,10 +1309,11 @@ int main(int argc, char **argv) int skip = 0; int end = 0; int addr = 0x1000; - bool acc_16 = false; - bool ind_16 = false; + bool acc_16 = true; + bool ind_16 = true; + bool src = false; - const dismnm *opcodes = a65816_ops; + const dismnm *opcodes = a6502_ops; for (int i = 1; i < argc; i++) { strref arg(argv[i]); @@ -1022,7 +1329,9 @@ int main(int argc, char **argv) } } else { strref var = arg.split_token('='); - if (!arg) { + if (var.same_str("src")) + src = true; + else if (!arg) { if (!bin) bin = argv[i]; else if (!out) @@ -1062,10 +1371,18 @@ int main(int argc, char **argv) size_t bytes = size - skip; if (end && bytes > size_t(end - skip)) bytes = size_t(end - skip); - Disassemble(out, mem + skip, bytes, acc_16, ind_16, addr, opcodes); + Disassemble(out, mem + skip, bytes, acc_16, ind_16, addr, opcodes, src); } free(mem); } + } else { + puts("Usage:\nx65dsasm binary disasm.txt [$skip[-$end]] [addr=$xxxx] [cpu=6502/65C02/65816] [mx=0-3] [src]\n" + " * binary: file which contains some 65xx series instructions\n" + " * disasm.txt: output file (default is stdout)\n" + " * $skip-$end: first byte offset to disassemble to last byte offset to disassemble\n" + " * addr: disassemble as if loaded at addr\n" + " * cpu: set which cpu to disassemble for (default is 6502)\n" + " * mx: set the mx flags which control accumulator and index register size\n"); } return 0; } \ No newline at end of file diff --git a/dump_x65/dump_x65.cpp b/dump_x65/dump_x65.cpp index 6d66e7c..c040c23 100644 --- a/dump_x65/dump_x65.cpp +++ b/dump_x65/dump_x65.cpp @@ -332,7 +332,7 @@ int main(int argc, char **argv) } if (!file) { - printf("Usage:\ndump_x65 filename [-sections] [-relocs] [-labels] [-map] [-late_eval[\n"); + printf("Usage:\ndump_x65 filename [-sections] [-relocs] [-labels] [-map] [-late_eval] [-code]\n"); return 0; } diff --git a/struse.h b/struse.h new file mode 100644 index 0000000..2b83339 --- /dev/null +++ b/struse.h @@ -0,0 +1,4617 @@ +/* +String User Classes + +The MIT License (MIT) + +Copyright (c) 2015 Carl-Henrik Skårstedt + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software +and associated documentation files (the "Software"), to deal in the Software without restriction, +including without limitation the rights to use, copy, modify, merge, publish, distribute, +sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or +substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE +FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +https://github.com/sakrac/struse + +Add this #define to *one* C++ file before #include "struse.h" to create the implementation: + +#define STRUSE_IMPLEMENTATION + +// in other words, this sequence should be at the top of one file: +#include ... +#define STRUSE_IMPLEMENTATION +#include "struse.h" +*/ + +#ifndef __STRUSE_H__ +#define __STRUSE_H__ + +#include // memcpy, memmove +#include // printf, vsnprintf +#include // va_list + +// +// Naming Rules: +// +// find* returns a position of a string where a match if found or -1 if not +// skip*/clip* trims beginning or end of string based on condition +// get_* returns a property, single character or substring +// before*/after* returns a substring matching the condition +// is_* returns a bool for a character or whole string test +// len_* return the number of characters matching the condition from the start of the string +// *_last indicates that search is done from the end of the string +// *_rh indicates a rolling hash search is used +// *_esc indicates that the search string allows for escape codes (\x => x) +// same_str* is a full string compare and returns true or false based on condtions +// *_range indicates that the result is filtered by a given set of valid characters +// + +typedef unsigned int strl_t; + +// helper defines for sprintf/printf with strop +// example: printf("string is " STROP_FMT "\n", STROP_ARG(strref)) +#define STRREF_FMT "%.*s" +#define STRREF_ARG(s) (int)(s).get_len(), (s).get() + +// internal helper functions for strref +int _find_rh(const char *text, strl_t len, const char *comp, strl_t comp_len); +int _find_rh_case(const char *text, strl_t len, const char *comp, strl_t comp_len); + +// strref holds a reference to a constant substring (const char*) +class strref { +protected: + const char *string; + strl_t length; + +public: + strref() { clear(); } + strref(const char *str); + strref(const char *str, strl_t len) : string(str), length(len) {} + strref(const char *str, int len) : string(str), length(strl_t(len)) {} + + bool valid() const { return string && length; } + void clear() { string = nullptr; length = 0; } + const char* get() const { return string; } + strl_t get_len() const { return length; } + char get_first() const { return (string && length) ? *string : 0; } + char get_last() const { return (string && length) ? string[length-1] : 0; } + + strl_t limit_pos(strl_t pos) { return pos=string && sub<=(string+length); } + strl_t substr_offs(strref substr) const { + if (is_substr(substr.get())) return strl_t(substr.get()-get()); return 0; } + strl_t substr_end_offs(strref substr) const { + if (is_substr(substr.get())) return strl_t(substr.get()-get()) + substr.get_len(); return 0; } + bool is_empty() const { return length==0; } + + // get fnv1a hash for string + unsigned int fnv1a(unsigned int seed = 2166136261) const; + unsigned int fnv1a_lower(unsigned int seed = 2166136261) const; + unsigned int fnv1a_append(unsigned int base_fnv1a_hash) const { return fnv1a(base_fnv1a_hash); } + + // whitespace ignore fnv1a (any sequence whitespace is replaced by one space) + unsigned int fnv1a_ws(unsigned int seed = 2166136261) const; + + // convert string to basic integer + int atoi() const; + + // convert string to floating point + float atof() const; + double atod() const; + + // number of characters of basic integer in string + int atoi_skip(); + + // convert hexadecimal string to signed integer + int ahextoi() const; + + // convert hexadecimal string to unsigned integer + unsigned int ahextoui() const; + unsigned int ahextoui_skip(); + unsigned int abinarytoui_skip(); + + // output string with newline (printf) + void writeln(); + + // is character empty such as space, tab, linefeed etc.? + static bool is_ws(unsigned char c) { return c <= ' '; } + + // is character a number? + static bool is_number(unsigned char c) { return c>='0' && c<='9'; } + + // is character a hexadecimal number? + static bool is_hex(char c) { return is_number(c) || (c>='A' && c<='F') || (c>='a' && c<='f'); } + + // is character alphabetic (A-Z or a-z)? + static bool is_alphabetic(unsigned char c) { return (c>='a' && c<='z') || (c>='A' && c<='Z'); } + + // is character alphabetic or numeric? + static bool is_alphanumeric(char c) { return is_number(c) || is_alphabetic(c); } + + // is character valid as part of a label? (num, _, A-Z, a-z) + static bool is_valid_label(unsigned char c) { return c=='_' || is_alphanumeric(c); } + + // word separators are non-alphanumeric characters except apostrophe. + static bool is_sep_ws(unsigned char c) { return c!='\'' && !is_alphanumeric(c); } + + // is control character? (!-/, ?-@, [-^, {-~) + static bool is_control(unsigned char c) { return !is_ws(c) && !is_alphanumeric(c) && c!='_'; } + + // choice of upper/lowercase conversions + static char tolower(char c); + static char toupper(char c); + static char tolower_win(char c); + static char toupper_win(char c); + static char tolower_amiga(char c); + static char toupper_amiga(char c); + static char tolower_macos(char c); + static char toupper_macos(char c); + static int tolower_unicode(int c); + static int toupper_unicode(int c); + + // operators + // strref += int: move string forward (skip) + void operator+=(const strl_t skip) { if (skip=0 && strl_t(skip) strref / strref < strref: greater than and lesser than operators + bool operator>(const strref o) const; + bool operator<(const strref o) const; + + // strref[int]: get character at position + char operator[](unsigned int pos) const { return pos 0; } + + // suffix compare + strl_t suffix_len(const strref str) const; + strl_t suffix_len_case(const strref str) const; + bool is_suffix_of(const strref str) const { return suffix_len(str)==get_len(); } + bool is_suffix_case_of(const strref str) { return suffix_len_case(str)==get_len(); } + bool has_suffix(const char *str) const { return strref(str).is_suffix_of(*this); } + + // whole word compare (prefix match + next char is whitespace or end of string) + bool is_word(const strref str) const { return prefix_len(str)==str.get_len() && whitespace_at(str.get_len()); } + bool is_word_case(const strref str) const { return prefix_len_case(str)==str.get_len() && whitespace_at(str.get_len()); } + + // string search + + // find first index of char c + int find(char c) const; + + // find first index of char c after pos + int find_at(char c, strl_t pos) const; + + // find first index of char c after index pos (find_at(c, pos+1)) + int find_after(char c, strl_t pos) const; + + // find first index of char c after index pos or return length for full string + int find_or_full(char c, strl_t pos) const; + int find_or_full_esc(char c, strl_t pos) const; + + // find last position of char c + int find_last(char c) const; + + // find first position of either char c or char d + int find(char c, char d) const; + + // find last position of either char c or char d + int find_last(char c, char d) const; + + // find first after last in string + int find_after_last(char a, char b) const { return find_after(b, find_last(a)+1); } + int find_after_last(char a1, char a2, char b) const { + int w = find_last(a1, a2)+1; int l = strref(string+w, length-w).find(b); return l>=0?l+w:-1; } + + // return position in this string of the first occurrence of the argument or negative if not found, not case sensitive + int find(const strref str) const; + int find_bookend(const strref str, const strref bookend) const; + + // return position in this string of the first occurrence of the argument or negative if not found, not case sensitive + int find(const char *str) const; + + // return position in this string of the first occurrence of the argument or negative if not found, case sensitive + int find_case(const strref str, strl_t pos = 0) const; + + // return position in this string of the first occurrence of the argument or negative if not found, case sensitive + int find_case(const char *str) const; + int find_case_esc(const strref str, strl_t pos) const; + int find_case_esc_range(const strref str, const strref range, strl_t pos) const; + int find_esc_range(const strref str, const strref range, strl_t pos) const; + + // return position in this string of the last occurrence of the argument or negative if not found, not case sensitive + int find_last(const strref str) const; + int find_last_bookend(const strref str, const strref bookend) const; + + // return position in this string of the last occurrence of the argument or negative if not found, not case sensitive + int find_last(const char *str) const; + + // return position in this string of the last occurrence of the argument or negative if not found, case sensitive + int find_last_case(const strref str) const; + + // find first instance after pos + int find(const strref str, strl_t pos) const; + + // find first instance after pos allowing escape codes in search string + int find_esc(const strref str, strl_t pos) const; + + // find any char from str in this string at position + int find_any_char_of(const strref range, strl_t pos = 0) const; + + // find any char from str or char range or char - with backslash prefix + int find_any_char_or_range(const strref range, strl_t pos = 0) const; + int find_any_not_in_range(const strref range, strl_t pos = 0) const; + + // find any char from str or char range or char - with backslash prefix + int find_range_char_within_range(const strref range_find, const strref range_within, strl_t pos = 0) const; + + // counts + int substr_count(const strref str) const; // count the occurrences of the argument in this string + int substr_count_bookend(const strref str, const strref bookend) const; + int substr_case_count(const strref str) const; // count the occurrences of the argument in this string + int substr_label_case_count(const strref str) const; + int count_repeat(char c, strl_t pos) const; + int count_repeat_reverse(char c, strl_t pos) const; + int count_lines() const; + int count_lines(strl_t pos) const { return strref(string, pos=0 && strl_t(len)0?string:nullptr, + len>0?(strl_t(len)='A')) + break; l--; } return strref(string, length-l); } + + strref before(char c) const { + int o = find(c); if (o>=0) return strref(string, o); return strref(); } + + strref before(char c, char d) const { + int o = find(c, d); if (o>=0) return strref(string, o); return strref(); } + + strref before_or_full(char c) const { + int o = find(c); if (o>=0) return strref(string, o); return *this; } + + strref before_last(char c) const { + int o = find_last(c); if (o>=0) return strref(string, o); return strref(); } + + strref before_last(char c, char d) const { + int o = find_last(c, d); if (o>=0) return strref(string, o); return strref(); } + + strref before_or_full(const strref str) const { + int o = find(str); if (o<0) return *this; return strref(string, o); } + + strref after_or_full(const strref str) const { + int o = find(str); if (o<0) return *this; return strref(string+o, length-o); } + + strref after_or_full(char c) const { int o = find(c); + if (o>=0) return strref(string+o+1, length-o-1); return *this; } + + strref after_or_full(char c, char d) const { int o = find(c, d); + if (o>=0) return strref(string+o+1, length-o-1); return *this; } + + strref after(char c) const { int o = find(c); + if (o>=0) return strref(string+o+1, length-o-1); return strref(); } + + strref after_last_or_full(char c) const { int o = find_last(c); + if (o>=0) return strref(string+o+1, length-o-1); return *this; } + + strref after_last_or_full(char c, char d) const { + int o = find_last(c, d); if (o>=0) return strref(string+o+1, length-o-1); return *this; } + + strref after_last(char c) const { int o = find_last(c); if (o>=0) + return strref(string+o+1, length-o-1); return strref(); } + + strref get_alphanumeric() const { strref r(*this); r.skip_whitespace(); + if (int l = r.len_alphanumeric()) return strref(string, l); return strref(); } + + strref get_label() const { return strref(string, len_label()); } + + strref before_or_full_case(const strref str) const { int o = find_case(str); + if (o<0) return *this; return strref(string, o); } + + strref after_or_full_case(const strref str) const { int o = find_case(str); + if (o<0) return *this; return strref(string+o, length-o); } + + strref between(char c, char d) { int s = find(c); if (s>=0) { int e = find_after(d, s); + if (e>=0) return get_substr(s+1, e-s-1); } return strref(); } + + // tokenization + strref split(strl_t pos) { pos = limit_pos(pos); strref ret = strref(string, pos); *this += pos; return ret; } + strref split_token(char c) { int t = find(c); if (t<0) t = length; strref r = strref(string, t); *this += t+1; return r; } + strref split_token_any(const strref chars) { strref r; int t = find_any_char_of(chars); + if (t>=0) { r = strref(string, t); *this += t; } return r; } + strref split_token_trim(char c) { strref r = split_token(c); skip_whitespace(); r.trim_whitespace(); return r; } + strref split_token_any_trim(const strref chars) { int t = find_any_char_of(chars); + if (t<0) t = length; strref r = strref(string, t); *this += t+1; r.trim_whitespace(); return r; } + strref split_range(const strref range, strl_t pos=0) { int t = find_any_char_or_range(range, pos); + if (t<0) t = length; strref r = strref(string, t); *this += t; return r; } + strref split_range_trim(const strref range, strl_t pos=0) { int t = find_any_char_or_range(range, pos); + if (t<0) t = length; strref r = strref(string, t); *this += t; r.trim_whitespace(); trim_whitespace(); return r; } + strref split_label() { skip_whitespace(); strref r(string, len_label()); *this += r.length; skip_whitespace(); return r; } + + // grab a block of text starting with (, [ or { and end with the corresponding number of ), ] or } + strref scoped_block_skip(); + + strref get_line() const; // return the current line even if empty,t don't change this line + strref get_line(strl_t line) const; // return line by index + strref next_line(); // return the current line even if empty and skip this to line after + strref line() { strref ret; while (valid() && !ret.valid()) ret = next_line(); return ret;} // return the current or next valid line skip this to line after + strref next_token(char c) { int o = find(c); if (o<0) o = get_len(); return split(o); } + strref token_chunk(char c) const { int o = find(c); if (o<0) return *this; return strref(string, o); } + void token_skip(const strref chunk) { skip(chunk.length+1); } + strref find_token(const char *substr, char token) const; + strref find_token_case(const char *substr, char token) const; + strref find_token(strref substr, char token) const; + strref find_token_case(strref substr, char token) const; + strref within_last(char a, char b) const { int f = find_last(a)+1; + int l = strref(string+f, length-f).find(b); if (l<0) l = 0; return strref(string+f, l); } + strref within_last(char a1, char a2, char b) const { int f = find_last(a1, a2)+1; + int l = strref(string+f, length-f).find(b); if (l<0) l = 0; return strref(string+f, l); } + + strref get_quote_xml() const; + int find_quoted_xml(char d) const; // returns length up to the delimiter d with xml quotation rules, or -1 if delimiter not found + int find_quoted(char d) const; // returns length up to the delimiter d with c/c++ quotation rules, or -1 if delimiter not found + + strref next_chunk_xml(char open, char close) const { int s = find_quoted_xml(open); + if (s<0) return strref(); strref left = get_skipped(s+1); return left.get_clipped(left.find_quoted_xml(close)); } + strref next_chunk_quoted(char open, char close) const { int s = find_quoted(open); + if (s<0) return strref(); strref left = get_skipped(s+1); return left.get_clipped(left.find_quoted(close)); } + void skip_chunk(const strref chunk) { strl_t add = strl_t(chunk.string-string)+chunk.length+1UL; + if (add class strmod : public B { + // mirror base class unsafe size operations (doesn't check capacity) + void add_len_int(strl_t l) { B::add_len_int(l); } + void sub_len_int(strl_t l) { B::sub_len_int(l); } + void set_len_int(strl_t l) { B::set_len_int(l); } + void dec_len_int() { B::dec_len_int(); } + void inc_len_int() { B::inc_len_int(); } +public: + strmod() { clear(); } + explicit operator strref() { return strref(charstr(), len()); } + + // mirror base template class + strl_t cap() const { return B::cap(); } + char* charstr() { return B::charstr(); } + const char* charstr() const { return B::charstr(); } + strl_t len() const { return B::len(); } + + // get a strref version of this string + strref get_strref() { return strref(charstr(), len()); } + strref get_strref() const { return strref(charstr(), len()); } + strl_t get_len() const { return B::len(); } + + // basic tests and operations + void clear() { set_len_int(0); } + bool valid() const { return charstr() && len(); } + operator bool() const { return valid(); } + bool empty() const { return !len(); } + bool full() const { return len() == cap(); } + const char* get() const { return charstr(); } + char get_first() const { return (charstr() && len()) ? *charstr() : 0; } + char get_last() const { return (charstr() && len()) ? charstr()[len()-1] : 0; } + void copy(strref o) { set_len_int(_strmod_copy(charstr(), cap(), o)); } + bool is_substr(const char *sub) const { return sub>=charstr() && sub<=(charstr()+len()); } + + // public size operators (checks for capacity) + strl_t fit_add(strl_t desired) { return (desired+len()) < cap() ? desired : (cap()-len()); } + bool set_len(strl_t l) { if (l<=cap()) { set_len_int(l); return true; } set_len_int(cap()); return false; } + void add_len(strl_t l) { add_len_int(fit_add(l)); } + + // offset operators will always return a strref + strref operator+(const strl_t skip) { if (skip=0 && strl_t(skip)= pos) set_len_int(pos); else { + strl_t ol = len(); set_len(pos); for (strl_t p = ol; p < len(); ++p) charstr()[p] = c; } } + + // prepend this string with a substring + void prepend(const strref o) { insert(o, 0); } + + // prepend this string with a c string + void prepend(const char *s) { insert(strref(s), 0); } + + // format this string using {n} notation to index into the args list + void format(const strref format, const strref *args) { + set_len_int(_strmod_format_insert(charstr(), 0, cap(), 0, format, args)); } + + // append a formatted string, return the appended part as a strref + strref format_append(const strref format, const strref *args) { strl_t l = len(); + set_len_int(_strmod_format_insert(charstr(), len(), cap(), len(), format, args)); + return strref(charstr()+l, len()-l); } + + // prepend a formatted string, return the prepend part as a strref + strref format_prepend(const strref format, const strref *args) { strl_t l = len(); + set_len_int(_strmod_format_insert(charstr(), len(), cap(), 0, format, args)); + return strref(charstr(), len()-l); } + + // insert a formatted string + void format_insert(const strref format, const strref *args, strl_t pos) { + set_len_int(_strmod_format_insert(charstr(), len(), cap(), pos, format, args)); } + + // c style sprintf (work around windows _s preference) +#ifdef _WIN32 + int sprintf(const char *format, ...) { va_list args; va_start(args, format); + set_len_int(vsnprintf_s(charstr(), cap(), _TRUNCATE, format, args)); va_end(args); return len(); } + int sprintf_at(strl_t pos, const char *format, ...) { va_list args; va_start(args, format); + int l = vsnprintf_s(charstr()+pos, cap()-pos, _TRUNCATE, format, args); + if (l+pos>len()) set_len(l+pos); va_end(args); return l; } + int sprintf_append(const char *format, ...) { va_list args; va_start(args, format); + int l = vsnprintf_s(end(), cap()-len(), _TRUNCATE, format, args); va_end(args); add_len_int(l); return l; } +#else + int sprintf(const char *format, ...) { va_list args; va_start(args, format); + set_len_int(vsnprintf(charstr(), cap(), format, args)); va_end(args); return len(); } + int sprintf_at(strl_t pos, const char *format, ...) { va_list args; va_start(args, format); + int l = vsnprintf(charstr()+pos, cap()-pos, format, args); + if (l+pos>len()) set_len(l+pos); va_end(args); return l; } + int sprintf_append(const char *format, ...) { va_list args; va_start(args, format); + int l = vsnprintf(end(), cap()-len(), format, args); va_end(args); add_len_int(l); return l; } +#endif + // replace instances of character c with character d + strref replace(char c, char d) { if (char *b = charstr()) { + for (strl_t i = len(); i; i--) { if (*b==c) *b = d; b++; } } return get_strref(); } + + // replace instances of substring a with substring b + strref replace(const strref a, const strref b) { + set_len(_strmod_inplace_replace_int(charstr(), len(), cap(), a, b)); return get_strref(); } + + // replace strings bookended by a specific string + strref replace_bookend(const strref a, const strref b, const strref bookend) { if (len() && get() && a && bookend) + set_len(_strmod_inplace_replace_bookend_int(charstr(), len(), cap(), a, b, bookend)); return get_strref(); } + + // replace a string found within this string with another string + void exchange(strl_t pos, strl_t size, const strref insert) { + set_len_int(_strmod_exchange(charstr(), len(), cap(), pos, size, insert)); } + + void exchange(const strref original, const strref insert) { + if (is_substr(original.get())) { exchange(strl_t(original.get()-get()), original.get_len(), insert); } } + + // remove a part of this string + strref remove(strl_t start, strl_t length) { + set_len_int(_strmod_remove(charstr(), len(), cap(), start, length)); + return get_strref(); return get_strref(); } + + // remove all instances of a character from this string + strl_t remove(char a) { set_len_int(_strmod_remove(charstr(), len(), cap(), a)); + return get_strref(); } + + // zero terminate this string and return it + const char *c_str() { charstr()[len()len()) + length = len()-pos; if (length) { for (strl_t i = 0; i class strown_base { + char string[S]; + strl_t length; +protected: + void add_len_int(strl_t l) { length += l; } // unsafe add len (size already checked) + void sub_len_int(strl_t l) { length -= l; } // unsafe sub len (size already checked) + void set_len_int(strl_t l) { length = l; } + void dec_len_int() { length--; } + void inc_len_int() { length++; } +public: + strl_t cap() const { return S; } + char* charstr() { return string; } + const char* charstr() const { return string; } + strl_t len() const { return length; } +}; + +class strovl_base { +protected: + char *string_ptr; + strl_t string_length; + strl_t string_space; + void add_len_int(strl_t l) { string_length += l; } // unsafe add len (size already checked) + void sub_len_int(strl_t l) { string_length -= l; } // unsafe sub len (size already checked) + void set_len_int(strl_t l) { string_length = l; } + void dec_len_int() { string_length--; } + void inc_len_int() { string_length++; } +public: + strl_t cap() const { return string_space; } + strl_t len() const { return string_length; } + char *charstr() { return string_ptr; } + const char* charstr() const { return string_ptr; } + void invalidate() { string_ptr = nullptr; string_space = 0; } + void set_overlay(char *ptr, strl_t space) { string_ptr = ptr; string_space = space; } + void set_overlay(char *ptr, strl_t space, strl_t len) { + string_ptr = ptr; string_space = space; string_length = len; } +}; + +// owned string class, instance with 'strown name' +template class strown : public strmod > { +public: + strown(const char *s) { strmod >::copy(s); } + explicit strown(strref s) { strmod >::copy(s); } + strown() {} +}; + +// overlay string class, instance with 'strovl name(char *, size)' +class strovl : public strmod { +public: + strovl() { invalidate(); string_length = 0; } + strovl(char *ptr, strl_t space) { set_overlay(ptr, space); string_length = 0; } + strovl(char *ptr, strl_t space, strl_t length) { set_overlay(ptr, space); string_length = length; } +}; + + +// helper for relative strings. purpose is for string collections that may need to grow +// by allocating a new buffer and copying. requires calling get(base strref) tp use string. +class strref_rel { +protected: + strl_t offset; + strl_t length; +public: + strref_rel() { clear(); } + strref_rel(const strref_rel &rel) : offset(rel.offset), length(rel.length) {} + strref_rel(strref orig, strref base) { + if (base.is_substr(orig.get())) { + offset = strl_t(orig.get()-base.get()); length = orig.get_len(); + } else + length = 0; + } + strref_rel(strref orig, strovl base) : strref_rel(orig, base.get_strref()) {} + strref_rel(const char *str, strl_t len, strref base) { + if (base.is_substr(str)) { + offset = strl_t(str-base.get()); length = len; + } else + length = 0; + } + + strref get(strref base) { return strref(base.get() + offset, length); } + strref get(strovl base) { return strref(base.get() + offset, length); } + strl_t get_len() { return length; } + + bool valid() const { return length>0; } + operator bool() const { return valid(); } + + void clear() { length = 0; } +}; + + + +// dynamic collection of strings in a single fixed char array +template class strcol { + char _buffer[S]; + strl_t end_buf; + char* push_back_len(char *w, char *e, strl_t len) { while (w>= 7; } return w; } + char* push_back_int(const char *s, strl_t l, strl_t o) { char *e = _buffer+S, *w = push_back_len(_buffer+o, e, l); if (strl_t(e-w)=end_buf; } + bool last(strl_t curr) const { return end(next(curr)); } + strl_t get_len(strl_t curr) { strl_t o = 0, s = 0; char c; do { c = _buffer[curr++]; o += strl_t(c&0x7f)<= end_buf) return end_buf; strl_t o = 0, s = 0; char c; do { c = _buffer[curr++]; o += strl_t(c&0x7f)<next(curr); else curr = 0; } + bool operator==(const iterator &i) const { return curr==i.curr && coll==i.coll; } + bool operator!=(const iterator &i) const { return curr!=i.curr || coll!=i.coll; } + void erase() { coll->erase(curr); } + strref operator*() { return coll->get(curr); } + }; + iterator end() { return iterator(*this, end_buf); } + iterator begin() { return iterator(*this); } +}; + +#ifdef STRUSE_IMPLEMENTATION +//#include +#include // atof + +// Windows extended ascii: https://msdn.microsoft.com/en-us/library/9hxt0028(v=vs.80).aspx +// Unicode: http://unicode-table.com/en/#basic-latin +// Mac OS Roman ascii: https://en.wikipedia.org/wiki/Mac_OS_Roman +// Amiga ascii: http://www.amigacoding.com/index.php/AMOSi:ASCII_Table + +static const unsigned char _aMacOSRomanHigh_ToLower[0x80] = { + 0x8a, 0x8c, 0x8d, 0x8e, 0x96, 0x9a, 0x9f, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xbe, 0xbf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0x88, 0x8a, 0x9b, 0xcf, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd8, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0x89, 0x90, 0x87, + 0x91, 0x8f, 0x92, 0x94, 0x95, 0x93, 0x97, 0x99, + 0xf0, 0x98, 0x9c, 0x9e, 0x9d, 0xfd, 0xfe, 0xff }; + +static const unsigned char _aMacOSRomanHigh_ToUpper[0x80] = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0xe7, + 0xcb, 0xe5, 0x80, 0xcc, 0x81, 0x82, 0x83, 0xe9, + 0xe6, 0xe8, 0xea, 0xed, 0xeb, 0xec, 0x84, 0xee, + 0xf1, 0xef, 0x85, 0xcd, 0xf2, 0xf4, 0xf3, 0x86, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xae, 0xaf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xce, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd9, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; + + +unsigned char int_tolower_macos_roman_ascii(unsigned char c) { + if (c>='A' && c<='Z') + return c+'a'-'A'; + if (c>=0x80) + return _aMacOSRomanHigh_ToLower[c&0x7f]; + return c; +} + +unsigned char int_toupper_macos_roman_ascii(unsigned char c) { + if (c>='a' && c<='z') + return c+'A'-'a'; + if (c>=0x80) + return _aMacOSRomanHigh_ToUpper[c&0x7f]; + return c; +} + +unsigned char int_tolower_amiga_ascii(unsigned char c) { + if (c>='A' && c<='Z') + return c+'a'-'A'; + if (c>=0xc0 && c<0xe0) + return c+0x20; + return c; +} + +unsigned char int_toupper_amiga_ascii(unsigned char c) { + if (c>='a' && c<='z') + return c+'A'-'a'; + if (c>=0xe0) + return c-0x20; + return c; +} + +unsigned char int_toupper_win_ascii(unsigned char c) { + if (c<'a') + return c; + if (c<='z') + return c+'A'-'a'; + switch (c) { + case 0x84: + return 0x83; + case 0x86: + return 0x8f; + case 0x82: + return 0x90; + case 0x91: + return 0x92; + case 0x94: + return 0x99; + case 0x81: + return 0x9a; + } + return c; +} + +unsigned char int_tolower_win_ascii(unsigned char c) { + if (c<'A') + return c; + if (c<='Z') + return c+'a'-'A'; + switch (c) { + case 0x8e: + return 0x84; + case 0x8f: + return 0x86; + case 0x90: + return 0x82; + case 0x92: + return 0x91; + case 0x99: + return 0x94; + case 0x9a: + return 0x81; + } + return c; +} + +// General lowercase of unicode range +unsigned int int_tolower_unicode(unsigned int c) +{ + if (c<'A' || c==0xd7 || c==0x138 || c==0x149) + return c; + if (c<='Z') + return c+'a'-'A'; + if (c<0xc0) + return c; + if (c<0xe0 || (c>=0x391 && c<0x3ab) || (c>=0x3d8 && c<0x3f0) || (c>=0x410 && c<0x430)) + return c+0x20; + if (c<0x100) + return c; + if (c<0x178) { + if (c>0x138 && c<0x149) + return ((c-1)|1)+1; + return c | 1; + } + if ((c>=0x460 && c<0x482) || (c>=0x48a && c<0x4c0) || (c>=0x4d0 && c<0x530)) + return c | 1; + if (c>=0x4c1 && c<0x4cf) + return ((c-1)|1)+1; + if (c>=0x400 && c<0x410) + return c+0x50; + + if (c>=0x531 && c<0x556) + return c+0x30; + + if (c==0x178) + return 0xff; + if (c==0x4c0) + return 0x4cf; + + return c; +} + +// General uppercase of unicode range +unsigned int int_toupper_unicode(unsigned int c) +{ + if (c<'a' || c==0xd7 || c==0x138 || c==0x149) + return c; + if (c<='z') + return c+'A'-'c'; + if (c<0xe0) + return c; + if (c==0xff) + return 0x178; + if (c<0x100 || (c>=0x3b1 && c<0x3cb) || (c>=0x3f8 && c<0x410) || (c>=0x430 && c<0x450)) + return c-0x20; + if (c<0x178) { + if (c>0x138 && c<0x149) + return ((c-1)&1UL)+1; + return c & ~1UL; + } + if ((c>=0x460 && c<0x482) || (c>=0x48a && c<0x4c0) || (c>=0x4d0 && c<0x530)) + return c & ~1UL; + if (c>=0x4c1 && c<0x4cf) + return ((c-1)&1UL)+1; + if (c>=0x450 && c<0x460) + return c-0x50; + + if (c>=0x561 && c<0x586) + return c-0x30; + + if (c==0x4cf) + return 0x4c0; + + return c; +} + +// english latin lowercase +unsigned char int_tolower_ascii7(unsigned char c) +{ + if (c<='Z' && c>='A') + return c+'a'-'A'; + return c; +} + +// english latin uppercase +unsigned char int_toupper_ascii7(unsigned char c) +{ + if (c>='a' && c<='z') + return c+'A'-'a'; + return c; +} + +// convert escape codes to characters +// supports: \a, \b, \f, \n, \r, \t, \v, \000, \x00 +// any other character is returned as same +static strl_t int_get_esc_code(const char *buf, strl_t left, unsigned char &code) +{ + strl_t step = 0; + if (!left) + return step; + char c = *buf++; + left--; + step++; + if (c=='x' && left && strref::is_hex(*buf)) { + // parse hex char code + c = 0; + for (int r = 0; r<2 && left; r++) { + char n = *buf++; + if (!strref::is_hex(n)) + break; + c = (c<<4) + n - (n<='9' ? '0' : (n<='F'?('A'-0xA) : ('a'-0xa))); + step++; + left--; + } + } else if (c>='0' && c<='7') { + // parse octal char code + c -= '0'; + for (int r = 0; r<2 && left; r++) { + char n = *buf++; + if (n<'0' || n>'7') + break; + c = c*8 + n-'0'; + step++; + left--; + } + } else { + // check for custom escape code symbol + switch (c) { + case 'a': + c = 7; + break; + case 'b': + c = 8; + break; + case 'f': + c = 12; + break; + case 'n': + c = 10; + break; + case 'r': + c = 13; + break; + case 't': + c = 9; + break; + case 'v': + c = 11; + break; + } + } + code = c; + return step; +} + +// tolower/toupper implementation +char strref::tolower(char c) { return int_tolower_ascii7(c); } +char strref::toupper(char c) { return int_toupper_ascii7(c); } +char strref::tolower_win(char c) { return int_tolower_win_ascii(c); } +char strref::toupper_win(char c) { return int_toupper_win_ascii(c); } +char strref::tolower_amiga(char c) { return int_tolower_amiga_ascii(c); } +char strref::toupper_amiga(char c) { return int_toupper_amiga_ascii(c); } +char strref::tolower_macos(char c) { return int_tolower_macos_roman_ascii(c); } +char strref::toupper_macos(char c) { return int_toupper_macos_roman_ascii(c); } +int strref::tolower_unicode(int c) { return int_tolower_unicode(c); } +int strref::toupper_unicode(int c) { return int_toupper_unicode(c); } + +// use printf to print current string on a single line +void strref::writeln() +{ + if (valid()) { + printf(STRREF_FMT "\n", STRREF_ARG(*this)); + } else + printf("\n"); +} + +// construct a strref from const char* +strref::strref(const char *str) +{ + if (!str || !*str) { + string = nullptr; + length = 0; + } else { + string = str; + strl_t l = 0; + while (*str++) + l++; + length = l; + } +} + +// get fnv1a hash of a string +unsigned int strref::fnv1a(unsigned int seed) const +{ + unsigned int hash = seed; + if (string) { + unsigned const char *scan = (unsigned const char*)string; + strl_t left = length; + while (left--) + hash = (*scan++ ^ hash) * 16777619; + } + return hash; +} + +// get lowercase fnv1a hash of a string +unsigned int strref::fnv1a_lower(unsigned int seed) const +{ + unsigned const char *scan = (unsigned const char*)string; + unsigned int hash = seed; + strl_t left = length; + while (left--) + hash = (int_toupper_ascii7(*scan++) ^ hash) * 16777619; + return hash; +} + +// get fnv1a hash of a string and treat any number whitespace as a single space +unsigned int strref::fnv1a_ws(unsigned int seed) const +{ + unsigned const char *scan = (unsigned const char*)string; + unsigned int hash = seed; + strl_t left = length; + while (left--) { + unsigned char c = *scan++; + if (c<' ') + c = ' '; + hash = (*scan++ ^ hash) * 16777619; + if (c==' ') { + while (left && *scan<=0x20) { + left--; + scan++; + } + } + } + return hash; +} + +// convert numeric string to integer +int strref::atoi() const +{ + if (!string) + return 0; + const char *s = string; + strl_t l = length; + while (l && s && *s<=0x20) { + s++; + l--; + } + if (!l) + return 0; + bool neg = false; + if (*s=='-') { + neg = true; + l--; + s++; + } + int v = 0; + while (l) { + char c = *s++; + l--; + if (c<'0' || c>'9') + break; + v = c-'0' + v*10; + } + return neg ? -v : v; +} + +// convert numeric string into floating point value +float strref::atof() const { + if (string[length]==0) + return (float)::atof(string); + strown<64> num(*this); + return (float)::atof(num.c_str()); +} + +// convert numeric string into double precision floating point value +double strref::atod() const { + if (string[length]==0) + return ::atof(string); + strown<64> num(*this); + return ::atof(num.c_str()); +} + +// convert numeric string to integer and move string forward +int strref::atoi_skip() +{ + const char *scan = string; + strl_t left = length; + while (*scan<=0x20 && left) { + scan++; + left--; + } + if (!left) + return 0; + bool neg = false; + if (*scan=='-') { + neg = true; + left--; + } + int value = 0; + while (left) { + char c = *scan; + if (c<'0' || c>'9') + break; + left--; + scan++; + value = c-'0' + value*10; + } + string += length-left; + length = left; + return neg ? -value : value; +} + +// convert a hexadecimal string to an unsigned integer +unsigned int strref::ahextoui() const +{ + const char *scan = string; + strl_t left = length; + while (*scan<=0x20 && left) { + scan++; + left--; + } + if (!left) + return 0; + if (left>2 && *scan=='0' && (scan[1]=='x' || scan[1]=='X')) { + scan += 2; + left -= 2; + } + strl_t hex = 0; + while (left) { + char c = *scan++; + left--; + if (c>='0' && c<='9') + hex = (hex<<4) | (c-'0'); + else if (c>='a' && c<='f') + hex = (hex<<4) | (c-'a'+10); + else if (c>='A' && c<='F') + hex = (hex<<4) | (c-'A'+10); + else + break; + } + return hex; +} + +// convert a hexadecimal string to an unsigned integer +unsigned int strref::ahextoui_skip() +{ + const char *scan = string; + strl_t left = length; + while (*scan<=0x20 && left) { + scan++; + left--; + } + if (!left) + return 0; + if (left>2 && *scan=='0' && (scan[1]=='x' || scan[1]=='X')) { + scan += 2; + left -= 2; + } + strl_t hex = 0; + while (left) { + char c = *scan; + if (c>='0' && c<='9') + hex = (hex<<4) | (c-'0'); + else if (c>='a' && c<='f') + hex = (hex<<4) | (c-'a'+10); + else if (c>='A' && c<='F') + hex = (hex<<4) | (c-'A'+10); + else + break; + scan++; + left--; + } + length -= strl_t(scan-string); + string = scan; + return hex; +} + +// convert a binary string to an unsigned integer +unsigned int strref::abinarytoui_skip() +{ + skip_whitespace(); + const char *scan = string; + strl_t left = length; + if (!left) + return 0; + strl_t bin = 0; + while (left) { + unsigned char c = (unsigned char)*scan; + if (c<'0' || c>'1') + break; + scan++; + left--; + bin = (bin<<1) | (c-'0'); + } + length -= strl_t(scan-string); + string = scan; + return bin; +} +// convert a hexadecimal string to a signed integer +int strref::ahextoi() const +{ + const char *scan = string; + strl_t left = length; + while (*scan<=0x20 && left) { + scan++; + left--; + } + if (!left) + return 0; + bool neg = *scan=='-'; + if (neg) { + scan++; + left--; + } + if (left>2 && *scan=='0' && (scan[1]=='x' || scan[1]=='X')) { + scan += 2; + left -= 2; + } + strl_t hex = 0; + while (left) { + char c = *scan++; + left--; + if (c>='0' && c<='9') + hex = (hex<<4) | (c-'0'); + else if (c>='a' && c<='f') + hex = (hex<<4) | (c-'a'+10); + else if (c>='A' && c<='F') + hex = (hex<<4) | (c-'A'+10); + else + break; + } + return neg ? -(int)hex : (int)hex; +} + +// count instances of a character in a string +int strref::count_char(char c) const +{ + strl_t left = length; + const char *scan = string; + int count = 0; + while (left) { + if (*scan++ == c) + count++; + left--; + } + return count; +} + +// find a character in a string +static int int_find_char(char c, const char *scan, strl_t length) +{ + strl_t left = length; + while (left) { + if (*scan++ == c) + return length - left; + left--; + } + return -1; +} + +// find a character in a string after pos +int strref::find(char c) const +{ + if (!valid()) + return -1; + return int_find_char(c, string, length); +} + +// find an instance of a char after pos +int strref::find_after(char c, strl_t pos) const +{ + if (length>pos) { + int o = int_find_char(c, string + pos + 1, length - pos - 1); + if (o >= 0) + return o + pos + 1; + } + return -1; +} + +// find an instance of a char at pos or after +int strref::find_at(char c, strl_t pos) const +{ + if (length>pos) { + int o = int_find_char(c, string + pos, length - pos); + if (o >= 0) + return o + pos; + } + return -1; +} + +// find an instance of a char at pos or after or return full string +int strref::find_or_full(char c, strl_t pos) const +{ + if (!string) + return 0; + if (pos>=length) + return length; + int o = int_find_char(c, string + pos, length - pos); + if (o >= 0) + return o + pos; + return length; +} + +// ignore matches that are in escape codes +int strref::find_or_full_esc(char c, strl_t pos) const +{ + if (!string) + return 0; + if (pos>=length) + return length; + + strl_t left = length-pos; + const char *scan = string+pos; + while (left) { + char m = *scan++; + if (m=='\\' && left) { + scan++; + left--; + } else if (m==c) + return length-left; + left--; + } + return length; +} + +// find last position of character c +int strref::find_last(char c) const +{ + if (length && string) { + strl_t left = length; + const char *scan = string + left; + while (left) { + if (*--scan == c) + return left - 1; + left--; + } + } + return -1; +} + +// find first position of either c or d +int strref::find(char c, char d) const +{ + strl_t left = length; + if (const char *scan = string) { + while (left) { + char n = *scan++; + if (n == c || n == d) + return length - left; + left--; + } + } + return -1; +} + +// find last instance of either character c or d +int strref::find_last(char c, char d) const +{ + if (length && string) { + strl_t left = length - 1; + const char *scan = string + left; + while (left) { + char n = *--scan; + if (n == c || n == d) + return left - 1; + left--; + } + } + return -1; +} + +// compare a string with a substring case sensitive +static bool int_compare_substr_case(const char *scan, strl_t length, const char *check, strl_t chk_len) +{ + if (length < chk_len) + return false; + if (scan==nullptr || check==nullptr) + return scan==check; + for (strl_t cl = 0; cl length) + return false; + + return int_compare_substr(string + pos, length - pos, str.string, str.length); +} + +// allow escape codes in search string +bool strref::same_substr_esc(const strref str, strl_t pos) const { + if (pos >= length) + return false; + + const char *scan = string + pos; + const char *compare = str.string; + strl_t compare_left = str.length; + while (compare_left) { + unsigned char c = (unsigned char)*compare++; + compare_left--; + if (c=='\\' && compare_left) { + strl_t skip = int_get_esc_code(compare, compare_left, c); + compare += skip; + compare_left -= skip; + } + if (int_tolower_ascii7(*scan++) != int_tolower_ascii7(c)) + return false; + } + return true; +} + +// case sensitive substring compare +bool strref::same_substr_case(const strref str, strl_t pos) const { + if ((str.length+pos) > length) + return false; + + return int_compare_substr_case(string + pos, length - pos, str.string, str.length); +} + +// allow escape codes in search string +bool strref::same_substr_case_esc(const strref str, strl_t pos) const { + if (pos >= length) + return false; + + const char *scan = string + pos; + const char *compare = str.string; + strl_t compare_left = str.length; + while (compare_left) { + unsigned char c = (unsigned char)*compare++; + compare_left--; + if (c=='\\' && compare_left) { + strl_t skip = int_get_esc_code(compare, compare_left, c); + compare += skip; + compare_left -= skip; + } + if (*scan++ != c) + return false; + } + return true; +} + +// iterate over tokens to find a string that matches substr case ignored +strref strref::find_token(const char *substr, char token) const +{ + strref parse = *this; + while (strref chunk = parse.token_chunk(token)) { + if (chunk.same_str(substr)) + return chunk; + parse.token_skip(chunk); + } + return strref(); +} + +// iterate over tokens to find a string matching substr case ignored +strref strref::find_token(strref substr, char token) const +{ + strref parse = *this; + while (strref chunk = parse.token_chunk(token)) { + if (chunk.same_str(substr)) + return chunk; + parse.token_skip(chunk); + } + return strref(); +} + +// iterate over tokens to find a string matching substr case sensitive +strref strref::find_token_case(const char *substr, char token) const +{ + strref parse = *this; + while (strref chunk = parse.token_chunk(token)) { + if (chunk.same_str_case(substr)) + return chunk; + parse.token_skip(chunk); + } + return strref(); +} + +// iterate over tokens to find a string matching substr case sensitive +strref strref::find_token_case(strref substr, char token) const +{ + strref parse = *this; + while (strref chunk = parse.token_chunk(token)) { + if (chunk.same_str_case(substr)) + return chunk; + parse.token_skip(chunk); + } + return strref(); +} + +// determine if string is greater than other string +bool strref::operator>(const strref o) const +{ + const char *scan = string; + const char *compare = o.string; + strl_t left = length>o.length ? o.length : length; + while (left) { + char c1 = *scan++; + char c2 = *compare++; + if (c1>c2) + return true; + else if (c1o.length; +} + +// determine if string is lesser than other string +bool strref::operator<(const strref o) const +{ + const char *scan = string; + const char *compare = o.string; + strl_t left = length>o.length ? o.length : length; + while (left) { + char c1 = *scan++; + char c2 = *compare++; + if (c1c2) + return false; + left--; + } + return length=find_len) { + if (int_tolower_ascii7(*scan++)==c) { + if (int_compare_substr(scan, left - 1, compare, find_len - 1)) + return length-left; + } + left--; + } + return -1; +} + +// find a substring within a string case ignored +int strref::find_bookend(const strref str, const strref bookend) const +{ + if (!str.valid() || !valid() || length= find_len) { + char d = int_tolower_ascii7(*scan++); + if (d == c && (left == length || bookend.char_matches_ranges(p)) && + (left == find_len || bookend.char_matches_ranges(int_tolower_ascii7(scan[find_len-1])))) { + if (int_compare_substr(scan, left - 1, compare, find_len - 1)) + return length - left; + } + p = d; + left--; + } + return -1; +} +// find a substring within a string case ignored starting at pos +int strref::find(const strref str, strl_t pos) const +{ + if (!str.valid() || !valid() || length= find_len) { + if (int_tolower_ascii7(*scan++) == c) { + if (int_compare_substr(scan, left - 1, compare, find_len - 1)) + return length - left; + } + left--; + } + + return -1; +} + +// find case sensitive allow escape codes (\x => x) in search string +int strref::find_esc(const strref str, strl_t pos) const +{ + if (!str.valid() || !valid() || pos>=length) + return -1; + + // start scan buffer pointers + const char *scan = string + pos; + const char *compare = str.string; + + // number of characters left in each buffer + strl_t scan_left = length - pos; + strl_t compare_left = str.length; + + // get first character + unsigned char c = (unsigned char)*compare++; + compare_left--; + if (c=='\\' && compare_left) { + strl_t skip = int_get_esc_code(compare, compare_left, c); + compare += skip; + compare_left -= skip; + } + + // sweep the scan buffer for the matching string + while (scan_left) { + if (*scan++ == c) { + const char *chk_scan = scan; + const char *chk_compare = compare; + strl_t chk_scan_left = scan_left; + strl_t chk_compare_left = compare_left; + while (chk_compare_left) { + unsigned char d = (unsigned char)*chk_compare++; + chk_compare_left--; + if (d=='\\' && compare_left) { + strl_t skip = int_get_esc_code(compare, compare_left, d); + compare += skip; + compare_left -= skip; + } + if (!chk_scan_left || int_tolower_ascii7(*chk_scan++)!=int_tolower_ascii7(d)) { + chk_compare_left = 1; + break; + } + chk_scan_left--; + } + if (!chk_compare_left) + return length - scan_left; + } + scan_left--; + } + return -1; +} + +// find a substring within a string case ignored +int strref::find(const char *str) const +{ + if (!str || !valid()) + return -1; + + char c = int_tolower_ascii7(*str++); + if (!c) + return 0; + + const char *scan = string; + const char *compare = str; + + strl_t l = length; + while (l) { + if (int_tolower_ascii7(*scan++)==c) { + bool equal = true; + const char *scan_chk = scan; + while (char c2 = *compare++) { + if (int_tolower_ascii7(*scan_chk++)!=int_tolower_ascii7(c2)) { + compare = str; + equal = false; + break; + } + } + if (equal) + return length-l; + } + l--; + } + return -1; +} + +// find a substring within a string case sensitive +int strref::find_case(const strref str, strl_t pos) const +{ + if (!str.valid() || !valid() || length=length) + return -1; + + const char *scan = string + pos; + const char *compare = str.string, *compare_chk = compare; + strl_t left2 = str.length - pos; + + strl_t left = length; + while (left>=left2) { + if (*scan++==*compare_chk) { + const char *scan_chk = scan; + compare_chk++; + while (--left2) { + if (*scan_chk++!=*compare_chk++) { + compare_chk = compare; + left2 = str.length; + break; + } + } + if (!left2) + return length-left; + } + left--; + } + return -1; +} + +// find case sensitive allow escape codes (\x => x) in search string +int strref::find_case_esc(const strref str, strl_t pos) const +{ + if (!str.valid() || !valid() || pos>=length) + return -1; + + // start scan buffer pointers + const char *scan = string + pos; + const char *compare = str.string; + + // number of characters left in each buffer + strl_t scan_left = length - pos; + strl_t compare_left = str.length; + + // get first character + unsigned char c = (unsigned char)*compare++; + compare_left--; + if (c=='\\' && compare_left) { + strl_t skip = int_get_esc_code(compare, compare_left, c); + compare += skip; + compare_left -= skip; + } + + // sweep the scan buffer for the matching string + while (scan_left) { + if (*scan++ == c) { + const char *chk_scan = scan; + const char *chk_compare = compare; + strl_t chk_scan_left = scan_left; + strl_t chk_compare_left = compare_left; + while (chk_compare_left) { + unsigned char d = (unsigned char)*chk_compare++; + chk_compare_left--; + if (d=='\\' && compare_left) { + strl_t skip = int_get_esc_code(compare, compare_left, d); + compare += skip; + compare_left -= skip; + } + if (!chk_scan_left || *chk_scan++!=d) { + chk_compare_left = 1; + break; + } + chk_scan_left--; + } + if (!chk_compare_left) + return length - scan_left; + } + scan_left--; + } + return -1; +} + +// checks if a range is an exclusion +static strref int_check_exclude(strref range, bool &include) +{ + const char *rng = range.get(); + strl_t rng_left = range.get_len(); + if (rng_left && *rng == '!') { + include = false; + return range + 1; + } + include = true; + return range; +} + +// checks if character c matches range +static bool int_char_match_range_case(char c, const char *rng_chk, strl_t rng_lft) +{ + // no match yet, check skipped character against allowed range + bool match = false; + while (rng_lft) { + unsigned char m = (unsigned char)*rng_chk++; + rng_lft--; + // escape code? + if (m == '\\' && rng_lft) { + strl_t skip = int_get_esc_code(rng_chk, rng_lft, m); + rng_chk += skip; + rng_lft -= skip; + } + // range? + if (rng_lft>1 && *rng_chk == '-') { + rng_chk++; + rng_lft--; + unsigned char n = (unsigned char)*rng_chk++; + rng_lft--; + // escape code for range end? + if (n == '\\' && rng_lft) { + strl_t skip = int_get_esc_code(rng_chk, rng_lft, n); + rng_chk += skip; + rng_lft -= skip; + } + if (c >= m && c <= n) { + match = true; + break; + } + } + else if (c == m) { + match = true; + break; + } + } + return match; +} + +// checks if character c matches range +static bool int_char_match_range(char c, const char *rng_chk, strl_t rng_lft) +{ + // no match yet, check skipped character against allowed range + bool match = false; + while (rng_lft) { + unsigned char m = (unsigned char)*rng_chk++; + rng_lft--; + // escape code? + if (m == '\\' && rng_lft) { + strl_t skip = int_get_esc_code(rng_chk, rng_lft, m); + rng_chk += skip; + rng_lft -= skip; + } + // range? + if (rng_lft>1 && *rng_chk == '-') { + rng_chk++; + rng_lft--; + unsigned char n = (unsigned char)*rng_chk++; + rng_lft--; + // escape code for range end? + if (n == '\\' && rng_lft) { + strl_t skip = int_get_esc_code(rng_chk, rng_lft, n); + rng_chk += skip; + rng_lft -= skip; + } + if (c >= int_tolower_ascii7(m) && c <= int_tolower_ascii7(n)) { + match = true; + break; + } + } + else if (c == int_tolower_ascii7(m)) { + match = true; + break; + } + } + return match; +} + +// find case sensitive allow escape codes (\x => x) in search string +int strref::find_case_esc_range(const strref str, const strref range, strl_t pos) const +{ + if (!str.valid() || !valid() || pos>=get_len() || !range.get_len()) + return -1; + + // start scan buffer pointers + const char *scan = string + pos; + const char *compare = str.string; + + // number of characters left in each buffer + strl_t scan_left = length - pos; + strl_t compare_left = str.length; + + // get first character + unsigned char c = (unsigned char)*compare++; + compare_left--; + if (c=='\\' && compare_left) { + strl_t skip = int_get_esc_code(compare, compare_left, c); + compare += skip; + compare_left -= skip; + } + + // check if range is inclusive or exclusive + bool include; + strref rng = int_check_exclude(range, include); + + // sweep the scan buffer for the matching string + while (scan_left) { + unsigned char b = (unsigned char)*scan++; + // check for string match + if (b == c) { + const char *chk_scan = scan; + const char *chk_compare = compare; + strl_t chk_scan_left = scan_left; + strl_t chk_compare_left = compare_left; + while (chk_compare_left) { + unsigned char d = (unsigned char)*chk_compare++; + chk_compare_left--; + unsigned char c = (unsigned char)*chk_scan++; + chk_scan_left--; + if (d=='\\' && compare_left) { + strl_t skip = int_get_esc_code(compare, compare_left, d); + compare += skip; + compare_left -= skip; + } + if (c!=d) { + chk_compare_left = 1; + break; + } + } + if (!chk_compare_left) + return length - scan_left; + } + + // no match yet, check character against range + // check if character is allowed + bool match = int_char_match_range_case(b, rng.get(), rng.get_len()); + if ((match && !include) || (!match && include)) + return -1; + + scan_left--; + } + return -1; +} + +// find substring, allow escape codes (\x => x) in search string +int strref::find_esc_range(const strref str, const strref range, strl_t pos) const +{ + if (!str.valid() || !valid() || pos>=get_len() || !range.get_len()) + return -1; + + // start scan buffer pointers + const char *scan = string + pos; + const char *compare = str.string; + + // number of characters left in each buffer + strl_t scan_left = length - pos; + strl_t compare_left = str.length; + + // get first character + unsigned char c = (unsigned char)*compare++; + compare_left--; + if (c=='\\' && compare_left) { + strl_t skip = int_get_esc_code(compare, compare_left, c); + compare += skip; + compare_left -= skip; + } + + // check if range is inclusive or exclusive + bool include; + strref rng = int_check_exclude(range, include); + + // sweep the scan buffer for the matching string + while (scan_left) { + unsigned char b = int_tolower_ascii7(*scan++); + // check for string match + if (b == c) { + const char *chk_scan = scan; + const char *chk_compare = compare; + strl_t chk_scan_left = scan_left; + strl_t chk_compare_left = compare_left; + while (chk_compare_left) { + unsigned char d = (unsigned char)int_tolower_ascii7(*chk_compare++); + chk_compare_left--; + unsigned char c = (unsigned char)int_tolower_ascii7(*chk_scan++); + chk_scan_left--; + if (d=='\\' && compare_left) { + strl_t skip = int_get_esc_code(compare, compare_left, d); + compare += skip; + compare_left -= skip; + } + if (c!=d) { + chk_compare_left = 1; + break; + } + } + if (!chk_compare_left) + return length - scan_left; + } + + // no match yet, check character against range + bool match = int_char_match_range(b, rng.get(), rng.get_len()); + + // check if character is allowed + if ((match && !include) || (!match && include)) + return -1; + + scan_left--; + } + return -1; +} + +// find a substring within a string case sensitive +int strref::find_case(const char *str) const +{ + if (!str || !valid()) + return -1; + + char c = *str++; + if (!c) + return 0; + + const char *scan = string; + const char *compare = str; + + strl_t left = length; + while (left) { + if (*scan++==c) { + bool equal = true; + const char *pb = scan; + while (char c2 = *compare++) { + if (*pb++!=c2) { + compare = str; + equal = false; + break; + } + } + if (equal) + return length-left; + } + left--; + } + return -1; +} + +// find last matching substring within a string case ignored +int strref::find_last(const strref str) const +{ + if (!str.valid() || !valid() || length0) { + left--; + if (int_tolower_ascii7(*--scan)==c) { + const char *scan_chk = scan; + const char *cmp_chk = compare; + strl_t left_check = str.length; + while (--left_check) { + if (int_tolower_ascii7(*--scan_chk)!=int_tolower_ascii7(*--cmp_chk)) { + left_check = 1; + break; + } + } + if (!left_check) + return left-str.length+1; + } + } + return -1; +} + +// find last matching substring within a string case ignored +int strref::find_last_bookend(const strref str, const strref bookend) const +{ + if (!str.valid() || !valid() || length0) { + left--; + char d = int_tolower_ascii7(*--scan); + if (d == c && (left==length || bookend.char_matches_ranges(p))) { + const char *scan_chk = scan; + const char *cmp_chk = compare; + strl_t left_check = str.length; + while (--left_check) { + if (int_tolower_ascii7(*--scan_chk) != int_tolower_ascii7(*--cmp_chk)) { + left_check = 1; + break; + } + } + if (!left_check) { + if (string == scan_chk || bookend.char_matches_ranges(int_tolower_ascii7(*--cmp_chk))) + return left - str.length + 1; + } + } + p = d; + } + return -1; +} + +// find last matching substring within a string case ignored +int strref::find_last(const char *str) const +{ + if (!str || !*str || !valid()) + return -1; + + const char *scan = string+length; + const char *compare = str + strlen(str); + unsigned char c = int_tolower_ascii7(*--compare); + int l = length; + while (l>=0) { + l--; + if (int_tolower_ascii7(*--scan)==c) { + const char *scan_chk = scan; + const char *cmp_chk = compare; + while (cmp_chk>str) { + if (int_tolower_ascii7(*--scan_chk)!=int_tolower_ascii7(*--cmp_chk)) { + cmp_chk = compare; + break; + } + } + if (cmp_chk==str) + return int(scan_chk-string); + } + } + return -1; +} + +// find last matching substring within a string case sensitive +int strref::find_last_case(const strref str) const +{ + if (!str.valid() || !valid() || length=0) { + if (*--scan==*compare_chk) { + const char *scan_chk = scan; + while (--left_chk) { + if (*--scan_chk!=*--compare_chk) { + compare_chk = compare; + left_chk = str.length; + break; + } + } + if (!left_chk) + return length-left; + } + left--; + } + return -1; +} + +// count number of matching substrings in string +int strref::substr_count(const strref str) const +{ + if (!str.valid() || !valid() || length=substrlen) { + while (left && int_tolower_ascii7(*scan++)!=c) + left--; + if (left && left>=substrlen) { + // first character matches and enough characters remain for a potential match + const char *compare = str.string+1; + strl_t sr = substrlen-1; + const char *scan_chk = scan; + while (sr && int_tolower_ascii7(*compare++)==int_tolower_ascii7(*scan_chk++)) + sr--; + if (sr==0) { + scan = scan_chk; + left -= substrlen-1; + count++; + } + } + } + return count; +} + +// count number of matching substrings in string +int strref::substr_count_bookend(const strref str, const strref bookend) const +{ + if (!str.valid() || !valid() || length= substrlen) { + + while (left) { + char d = int_tolower_ascii7(*scan++); + if (d == c && bookend.char_matches_ranges(p)) + break; + p = d; + left--; + } + if (left && left >= substrlen) { + // first character matches and enough characters remain for a potential match + const char *compare = str.string + 1; + strl_t sr = substrlen - 1; + const char *scan_chk = scan; + while (sr && int_tolower_ascii7(*compare++) == int_tolower_ascii7(*scan_chk++)) + sr--; + if (sr == 0 && (scan_chk == (string + length) || bookend.char_matches_ranges(int_tolower_ascii7(*scan_chk++)))) { + scan = scan_chk; + left -= substrlen - 1; + count++; + } + } + } + return count; +} + +// count number of matching substrings in string +int strref::substr_case_count(const strref str) const +{ + if (!str.valid() || !valid() || length=substrlen) { + while (left && *scan++!=c) + left--; + if (left && left>=substrlen) { + // first character matches and enough characters remain for a potential match + const char *compare = str.string+1; + strl_t sr = substrlen-1; + const char *scan_chk = scan; + while (sr && *compare++==*scan_chk++) + sr--; + if (sr==0) { + scan = scan_chk; + left -= substrlen-1; + count++; + } + } + } + return count; +} + +// count number of matching substrings that are bounded by separators and case sensitive in string +int strref::substr_label_case_count(const strref str) const +{ + if (!str.valid() || !valid() || length=substrlen) { + while (left) { + char d = *scan++; + if (d==c) + break; + left--; + p = d; + } + if (!is_valid_label(p) && left && left>=substrlen) { + // first character matches and enough characters remain for a potential match + const char *compare = str.string+1; + strl_t sr = substrlen-1; + const char *scan_chk = scan; + while (sr && *compare++==*scan_chk++) + sr--; + if (sr==0) { + if (!left || !is_valid_label(*scan_chk)) { + scan = scan_chk; + left -= substrlen-1; + count++; + } + } + } + } + return count; +} + +// count how many times character c repeats at pos +int strref::count_repeat(char c, strl_t pos) const { + if (pos>=length) + return 0; + const char *scan = string + pos; + strl_t left = length - pos; + int count = 0; + while (left) { + if (*scan++ != c) + return count; + left--; + count++; + } + return count; +} + +// count how many time a character repeats backwards at pos +int strref::count_repeat_reverse(char c, strl_t pos) const { + if (pos>=length) + return 0; + const char *scan = string + pos; + strl_t left = pos; + int count = 0; + while (left) { + if (*scan-- != c) + return count; + left--; + count++; + } + return count; +} + +// count number of lines with any line ending standard +int strref::count_lines() const { + const char *scan = string; + strl_t left = length; + int count = 0; + while (left) { + char c = *scan++; + left--; + if (c==0x0a || c==0x0d) { + count++; + if (left && ((c==0x0a && *scan==0x0d) || (c==0x0d && *scan==0x0a))) { + scan++; + left--; + } + } + } + return count; +} + + +// find any char from str in this string at position +// (does not check for escape codes or ranges) +int strref::find_any_char_of(const strref range, strl_t pos) const { + if (pos>=length) + return -1; + + const char *scan = get() + pos; + strl_t left = length-pos; + + const char *rng = range.get(); + strl_t count = range.get_len(); + + while (left) { + char c = *scan++; + const char *rng_chk = rng; + for (strl_t n = count; n; --n) { + if (c == *rng_chk++) + return length-left; + } + left--; + } + return -1; +} + +static int int_find_range(const char *scan, strl_t left, strl_t length, strref rng, bool include) +{ + while (left) { + unsigned char c = (unsigned char)*scan++; + bool match = int_char_match_range_case(c, rng.get(), rng.get_len()); + if ((match && include) || (!match && !include)) + return length - left; + left--; + } + return -1; +} + +// find a character matching a range, allow a range of characters using '-' +// such as a-fq0-5 == abcdefq012345 and prefix ! to exclude +int strref::find_any_char_or_range(const strref range, strl_t pos) const { + if (pos>=length) + return -1; + + bool include; + strref rng = int_check_exclude(range, include); + + return int_find_range(string+pos, length-pos, length, rng, include); +} + +// find a word made out of characters in the given range +strref strref::get_range_word(const strref range, strl_t pos) const +{ + if (pos >= length) + return strref(); + + bool include; + strref rng = int_check_exclude(range, include); + + return get_substr(0, int_find_range(string + pos, length - pos, length, rng, !include)); +} + +int strref::find_any_not_in_range(const strref range, strl_t pos) const { + if (pos>=length) + return -1; + + if (pos>=length) + return -1; + + bool include; + strref rng = int_check_exclude(range, include); + + return int_find_range(string+pos, length-pos, length, rng, !include); +} +// search of a character in a given range while also checking that +// skipped characters are in another given range. +int strref::find_range_char_within_range(const strref range_find, const strref range_within, strl_t pos) const { + if (pos>=length) + return -1; + + const char *scan = get() + pos; + strl_t l = length-pos; + + // check if range is inclusive or exclusive + bool include_find; + strref rng_f = int_check_exclude(range_find, include_find); + + bool include_within; + strref rng_w = int_check_exclude(range_within, include_within); + + while (l) { + unsigned char c = (unsigned char)*scan++; + bool match_find = int_char_match_range_case(c, rng_f.get(), rng_f.get_len()); + if ((match_find && include_find) || (!match_find && !include_find)) + return length - l; + + // no match yet, check skipped character against allowed range + bool match = int_char_match_range_case(c, rng_w.get(), rng_w.get_len()); + + // check if character is allowed + if ((match && !include_within) || (!match && include_within)) + return -1; + l--; + } + return -1; +} + +// check if character matches a given range (this) +bool strref::char_matches_ranges(unsigned char c) const { + // check if range is inclusive or exclusive + bool include; + strref rng = int_check_exclude(*this, include); + bool match = int_char_match_range_case(c, rng.get(), rng.get_len()); + return (match && include) || (!match && !include); +} + +// wildcard search + +#define MAX_WILDCARD_SEGMENTS 64 +#define MAX_WILDCARD_STEPS 48 +#define MAX_WILDCARD_SEARCH_STACK 32 +enum WILDCARD_SEGMENT_TYPE { + WCST_END, + WCST_FIND_SUBSTR, + WCST_FIND_SUBSTR_RANGE, + WCST_FIND_RANGE_CHAR, + WCST_FIND_RANGE_CHAR_RANGED, + WCST_FIND_WORD_START, + WCST_FIND_WORD_START_RANGED, + WCST_FIND_WORD_END, + WCST_FIND_WORD_END_RANGED, + WCST_FIND_LINE_START, + WCST_FIND_LINE_START_RANGED, + WCST_FIND_LINE_END, + WCST_FIND_LINE_END_RANGED, + WCST_NEXT_SUBSTR, + WCST_NEXT_ANY_CHAR, + WCST_NEXT_RANGE_CHAR, + WCST_NEXT_WORD_START, + WCST_NEXT_WORD_END, + WCST_NEXT_LINE_START, + WCST_NEXT_LINE_END, +}; + +// ? => any single +// # => any single number +// [] => any single between the brackets +// [-] => any single in the range from character before - to character after +// [!] => any single not between the brackets +// < => start of word +// > => end of word +// @ => start of line +// ^ => end of line +// * => any substring +// *% => any substring excluding whitespace +// *@ => any substring on same line +// *$ => any substring containing alphanumeric ascii characters +// *{} => any substring only containing characters between parenthesis +// *{!} => any substring not containing characters between parenthesis +// \?, \[, \*, etc. => search for character after backslash +// \n, \t, etc. => search for linefeed, tab etc. +// +// words are groups of letters not containing whitespace or separators +// which are alphanumeric characters plus apostrophe (') + +// predefined ranges for wildcard filters +static const strref _no_whitespace_range("!\01- "); +static const strref _no_enter_range("!\n\r"); +static const strref _enter_range("\n\r"); +static const strref _alphanumeric_range("0-9A-Za-z"); +static const strref _numeric_range("0-9"); +static const strref _wildcard_control("*?#[<>@^"); + +// convert wildcard to a set of search steps +static int _build_wildcard_steps(const strref wild, strref *segs, char *type, int &segments) { + int numSeg = 0; + int numType = 0; + + // segment separators are: *, ?, [, < + int pos = 0, last = 0; // current position, last evaluated position + bool search = true; // in search mode + strref range; + for (;;) { + // check for number of search segments / number of search steps overflow + if (numSeg > (MAX_WILDCARD_SEGMENTS-4) || numType > (MAX_WILDCARD_STEPS-2)) + return strref(); + int next_pos = wild.find_any_char_of(_wildcard_control, pos); + if (next_pos<0) { // completed? (found no wildcard token) + // add last segment if there was one + if (wild.get_len() >(strl_t)last) { + segs[numSeg++] = wild.get_substr(last, wild.get_len()-last); + if (search && range) + segs[numSeg++] = range; + type[numType++] = search ? (range ? WCST_FIND_SUBSTR_RANGE : WCST_FIND_SUBSTR) : WCST_NEXT_SUBSTR; + } + range.clear(); + break; + } + switch (wild.get_at(next_pos)) { + case '*': // * => any substring with optional filter + if (next_pos > last) { + segs[numSeg++] = wild.get_substr(last, next_pos-last); + if (search && range) + segs[numSeg++] = range; + type[numType++] = search ? (range ? WCST_FIND_SUBSTR_RANGE : WCST_FIND_SUBSTR) : WCST_NEXT_SUBSTR; + } + last = pos = next_pos+1; + range.clear(); + // check for substring character filter + if (strl_t(pos) < wild.get_len()) { + switch (wild[pos]) { + case '{': { // user defined character filter + int range_end = wild.find_after('}', pos); + if (range_end > 0) { + range = wild.get_substr(pos+1, range_end-pos); + last = pos = range_end + 1; + } else + pos++; + break; + } + case '%': // % => no whitespaces + range = _no_whitespace_range; + last = ++pos; + break; + case '@': // @ => no line break + range = _no_enter_range; + last = ++pos; + break; + case '$': // $ => only alphanumeric characters + range = _alphanumeric_range; + last = ++pos; + break; + } + } + search = true; + break; + + case '<': // < = first character of word + if (next_pos > last) { + segs[numSeg++] = wild.get_substr(last, next_pos-last); + if (search && range) + segs[numSeg++] = range; + type[numType++] = search ? (range ? WCST_FIND_SUBSTR_RANGE : WCST_FIND_SUBSTR) : WCST_NEXT_SUBSTR; + search = false; + range.clear(); + } + if (search && range) + segs[numSeg++] = range; + type[numType++] = search ? (range ? WCST_FIND_WORD_START_RANGED : WCST_FIND_WORD_START) : WCST_NEXT_WORD_START; + search = false; + range.clear(); + pos = last = next_pos+1; + break; + + case '>': // > = first character after word + if (next_pos > last) { + segs[numSeg++] = wild.get_substr(last, next_pos-last); + if (search && range) + segs[numSeg++] = range; + type[numType++] = search ? (range ? WCST_FIND_SUBSTR_RANGE : WCST_FIND_SUBSTR) : WCST_NEXT_SUBSTR; + search = false; + range.clear(); + } + if (search && range) + segs[numSeg++] = range; + type[numType++] = search ? (range ? WCST_FIND_WORD_END_RANGED : WCST_FIND_WORD_END) : WCST_NEXT_WORD_END; + search = false; + range.clear(); + pos = last = next_pos+1; + break; + + case '@': // < = first character of line + if (next_pos > last) { + segs[numSeg++] = wild.get_substr(last, next_pos-last); + if (search && range) + segs[numSeg++] = range; + type[numType++] = search ? (range ? WCST_FIND_SUBSTR_RANGE : WCST_FIND_SUBSTR) : WCST_NEXT_SUBSTR; + search = false; + range.clear(); + } + type[numType++] = search ? (range ? WCST_FIND_LINE_START_RANGED : WCST_FIND_LINE_START) : WCST_NEXT_LINE_START; + search = false; + range.clear(); + pos = last = next_pos+1; + break; + + case '^': // > = first character after line + if (next_pos > last) { + segs[numSeg++] = wild.get_substr(last, next_pos-last); + if (search && range) + segs[numSeg++] = range; + type[numType++] = search ? (range ? WCST_FIND_SUBSTR_RANGE : WCST_FIND_SUBSTR) : WCST_NEXT_SUBSTR; + search = false; + range.clear(); + } + if (search && range) + segs[numSeg++] = range; + type[numType++] = search ? (range ? WCST_FIND_LINE_END_RANGED : WCST_FIND_LINE_END) : WCST_NEXT_LINE_END; + search = false; + range.clear(); + pos = last = next_pos+1; + break; + + case '?': // ? = any character + // any character is redundant if currently searching + if (!search) { + if (next_pos > last) { + segs[numSeg++] = wild.get_substr(last, next_pos-last); + if (search && range) + segs[numSeg++] = range; + type[numType++] = search ? (range ? WCST_FIND_SUBSTR_RANGE : WCST_FIND_SUBSTR) : WCST_NEXT_SUBSTR; + } + range.clear(); + type[numType++] = WCST_NEXT_ANY_CHAR; + search = false; + range.clear(); + pos = last = next_pos+1; + } else + last = ++pos; + break; + + case '#': // # = any number (hard coded range) + if (next_pos > last) { + segs[numSeg++] = wild.get_substr(last, next_pos-last); + type[numType++] = search ? WCST_FIND_SUBSTR : WCST_NEXT_SUBSTR; + search = false; + } + segs[numSeg++] = _numeric_range; + type[numType++] = search ? WCST_FIND_RANGE_CHAR : WCST_NEXT_RANGE_CHAR; + search = false; + pos = last = next_pos+1; + break; + + case '[': { // [..] = limited range character + int close_pos = wild.find_after(']', next_pos+1); + if (close_pos>=1) { + if (next_pos > last) { + segs[numSeg++] = wild.get_substr(last, next_pos-last); + if (search && range) + segs[numSeg++] = range; + type[numType++] = search ? (range ? WCST_FIND_SUBSTR_RANGE : WCST_FIND_SUBSTR) : WCST_NEXT_SUBSTR; + search = false; + range.clear(); + } + segs[numSeg++] = wild.get_substr(next_pos+1, close_pos-next_pos-1); + if (search && range) + segs[numSeg++] = range; + type[numType++] = search ? (range ? WCST_FIND_RANGE_CHAR_RANGED : WCST_FIND_RANGE_CHAR) : WCST_NEXT_RANGE_CHAR; + search = false; + range.clear(); + pos = last = close_pos+1; + } else + pos = next_pos+1; + break; + } + } + } + type[numType++] = WCST_END; + segments = numSeg; + return numType; +} + +// search for a substring with wildcard rules +strref strref::find_wildcard(const strref wild, strl_t start, bool case_sensitive) const +{ + // collection of sub segments from wildcard + strref segs[MAX_WILDCARD_SEGMENTS]; + char type[MAX_WILDCARD_STEPS]; + int numSeg = 0; + + // Convert the wildcard to a set of sequential searches and matches + int numType = _build_wildcard_steps(wild, segs, type, numSeg); + + // start going through the steps to find a match + int pos = start; + char last_valid_search_step[MAX_WILDCARD_SEARCH_STACK]; + char last_valid_search_seg[MAX_WILDCARD_SEARCH_STACK]; + strl_t last_valid_search_pos[MAX_WILDCARD_SEARCH_STACK]; + while ((strl_t)pos < length) { + int first_pos = pos; + int seg = 0; + bool valid = true; + int last_valid_stack = 0; + int step = 0; + while (step=length) { + valid = false; + break; + } + } + found_pos = pos; + break; + + case WCST_FIND_LINE_START_RANGED: + find = true; + // current position may be ok if first pos or prev=line sep + // skip if: current = separator or previous is not separator + if (pos && string[pos-1]!=0xa && string[pos-1]!=0x0d) { + while (strl_t(pos)=length) { + valid = false; + break; + } + } + found_pos = pos; + seg++; + break; + + case WCST_NEXT_LINE_END: + if (strl_t(pos)5) + left = 5; + unsigned char f = *scan++; + unsigned int c = f, mask = 0x80; + while ((mask&c) && left) { + unsigned char n = *scan++; + c = (c<<6)|(n&0x3f); + mask <<= 5; + left--; + } + return c; +} + +// read one utf8 from the start of a string and move string +// move string forward by the size of the code. +unsigned int strref::pop_utf8() +{ + if (!valid()) +return 0; +const char *scan = string; +strl_t left = length-1; +if (left>5) +left = 5; +unsigned char f = *scan++; +unsigned int c = f, m = 0x80; +while ((m&c) && left) { + unsigned char n = *scan++; + c = (c<<6)|(n&0x3f); + m <<= 5; + left--; +} +length -= strl_t(scan-string); +string = scan; +return c; +} + +bool strref::valid_ascii7() const +{ + const char *scan = get(); + size_t left = get_len(); + while (left) { + char c = *scan++; + if ((c<' ' || c>=127) && c!=0x0a && c!=0x0d && c!=0x09) + return false; + left--; + } + return true; +} + +// find the character d outside of quoted xml text +int strref::find_quoted_xml(char d) const +{ + const char *scan = string; + strl_t left = length; + char q = 0; // quote type is either " or ' + while (left) { + char c = *scan++; + if (q) { + if (c==q) + q = 0; + } else if ((c=='"' || c=='\'')) + q = c; + else if (c==d) + return length-left; + --left; + } + return -1; +} + +// if this string begins as an xml quote return that. +strref strref::get_quote_xml() const +{ + char quote_char = get_first(); + if (quote_char!='"' && quote_char!='\'') + return strref(); + + const char *scan = string+1; + strl_t left = length-1; + while (left) { + char c = *scan++; + if (c==quote_char) + return strref(string+1, length-left-1); + --left; + } + return strref(); +} + +// find the character d outside of a quote +int strref::find_quoted(char d) const +{ + strl_t left = length; + const char *scan = string; + char quote_char = 0; + char previous_char = 0; + while (left) { + char c = *scan++; + if (quote_char) { + if (c==quote_char && previous_char!='\\') + quote_char = 0; + } else if (c=='"' || c=='\'') + quote_char = c; + else if (c==d) + return length-left; + --left; + } + return -1; +} + +// grab a block of text starting with (, [ or { and end with the corresponding number of ), ] or } +strref strref::scoped_block_skip() +{ + char scope = get_first(); + if (length && (scope == '(' || scope == '[' || scope == '{')) { + char close = scope=='(' ? ')' : (scope=='[' ? ']' : '}'); + const char *scan = string; + strl_t depth = 0; + strl_t left = length; + do { + char c = *scan++; + if (c==scope) + depth++; + else if (c==close) + depth--; + } while (depth && left); + if (!depth) { + strref block = strref(string+1, strl_t(scan-string-2)); + length -= strl_t(scan-string); + string = scan; + return block; + } + } + return strref(); +} + + +// return the current line of text and move this string ahead to the next. +// note: supports all known line feed configurations. +strref strref::next_line() +{ + const char *start = string; + const char *scan = start; + strl_t left = length; // if not valid left=0 and no characters will be interpreted + strref ret; + while (left && *scan!=0x0a && *scan!=0x0d) { + scan++; + left--; + } + // this is the line to return + ret = strref(start, strl_t(scan-start)); + if (left) { + char c = *scan++; + left--; + if (left && ((c==0x0a && *scan==0x0d) || (c==0x0d && *scan==0x0a))) { + scan++; + left--; + } + } + if (left) { + string = scan; + length = left; + } else { + string = nullptr; + length = 0; + } + return ret; +} + +// get line from current string +strref strref::get_line() const +{ + const char *start = string; + const char *scan = start; + strl_t left = length; // if not valid left=0 and no characters will be interpreted + strref ret; + while (left && *scan!=0x0a && *scan!=0x0d) { + scan++; + left--; + } + // this is the line to return + return strref(start, strl_t(scan-start)); +} + +// get a specific line number (0 indexed) +strref strref::get_line(strl_t line_num) const { + strref scan(*this); + while (scan) { + strref line = scan.next_line(); + if (!line_num) + return line; + line_num--; + } + return strref(); +} + +// determine how many characters can be used for floating point +strl_t strref::len_float_number() const +{ + // skip whitespace + const char *scan = string; + strl_t left = length; + + // valid check + if (scan==nullptr || left==0) + return 0; + + // not a floating point if just spaces and a dot + bool has_value = false; + + // include whitespace + strl_t ws = len_whitespace(); + scan += ws; + left -= ws; + + // include sign + if (left && (*scan=='-' || *scan=='+')) { + scan++; + left--; + if (!left || !is_number(*scan)) + return 0; + } + + // integer portion + while (left && is_number(*scan)) { + scan++; + left--; + has_value = true; + } + + // decimal + if (left && *scan=='.') { + scan++; + left--; + } + + // fraction + while (left && is_number(*scan)) { + scan++; + left--; + has_value = true; + } + + // exponent + if (left && (*scan=='e' || *scan=='E')) { + strl_t e = left; + scan++; + left--; + if (left && (*scan=='-' || *scan=='+')) { + scan++; + left--; + } + if (!left || !is_number(*scan)) + return length-e; + while (left && is_number(*scan)) { + scan++; + left--; + has_value = true; // e-10 is a fine floating point number + } + } + + // return size of floating point number + return has_value ? length-left : 0; +} + +// insert a substring into a string +strl_t _strmod_insert(char *string, strl_t length, strl_t cap, const strref sub, strl_t pos) +{ + if (pos>length) + return 0; + + if (sub.get_len()==0) + return 0; + + strl_t ins = sub.get_len(); + strl_t end = length; + strl_t last = ins+end; + if (last>cap) { + if (ins+pos>cap) { + if (ins>cap) + ins = 0; + else + ins = cap-pos; + } + } else { + const char *src = string+end; + char *dst = string+ins+end; + strl_t move = length-pos; + for (; move; move--) + *--dst = *--src; + } + const char *src = sub.get(); + char *dst = string + pos; + const char *e = string + cap; + strl_t left = sub.get_len(); + while (left && dst < e) { + unsigned char c = (unsigned char)*src++; + left--; + *dst++ = c; + } + + return ins + length; +} + +// determine the size of this string with evaluated escape codes +static strl_t int_string_size_esc(const char *string, strl_t length) +{ + strl_t size = 0; + while (length) { + unsigned char c = (unsigned char)*string++; + length--; + if (c=='\\' && length) { + strl_t skip = int_get_esc_code(string, length, c); + string += skip; + length -= skip; + } + size++; + } + return size; +} + +// insert a substring into a string allowing for escape codes +strl_t _strmod_insert_esc(char *string, strl_t length, strl_t cap, const strref sub, strl_t pos) +{ + if (pos>length) + return 0; + + if (sub.get_len()==0) + return 0; + + strl_t ins = int_string_size_esc(sub.get(), sub.get_len()); + strl_t end = length; + strl_t last = ins+end; + if (last>cap) { + if ((ins+pos)>cap) { + if (pos>cap) + ins = 0; + else + ins = cap-pos; + } + } else { + const char *src = string+end; + char *dst = string+ins+end; + strl_t move = length-pos; + for (; move; move--) + *--dst = *--src; + } + const char *src = sub.get(); + char *dst = string + pos; + const char *e = string + cap; + strl_t left = sub.get_len(); + while (left && dst < e) { + unsigned char c = (unsigned char)*src++; + left--; + if (c=='\\' && left) { + strl_t skip = int_get_esc_code(src, left, c); + src += skip; + left -= skip; + } + *dst++ = c; + } + + return ins + length; +} + +// insert substrings by {n} notation +strl_t _strmod_format_insert(char *string, strl_t length, strl_t cap, strl_t pos, + strref format, const strref *args) { + // insert many things.. + + // can't insert at a position that is beyond the current size. + if (pos > length) + return length; + + while (format) { + // scan for '{' + int ins = format.find_or_full_esc('{', 0); + int close = format.find_after('}', ins); + if (close<0) { + ins = format.get_len(); + } + + // insert block before '{' + if (ins > 0) { + strl_t prev = length; + length = _strmod_insert_esc(string, length, cap, format.get_clipped(ins), pos); + pos += length - prev; + format += ins; + } + + // if there was a {} process that.. + if (format.get_first()=='{' && close>0) { + int which = format.get_substr(1, close-ins).atoi(); + strl_t prev = length; + length = _strmod_insert(string, length, cap, args[which], pos); + pos += length - prev; + format += close-ins+1; + } + } + return length; +} + +// remove all instances of a character from a string +strl_t _strmod_remove(char *string, strl_t length, strl_t cap, char a) +{ + char *scan = string; + strl_t left = length; + while (left && *scan!=a) { + left--; + scan++; + } + if (left) { + strl_t n = left; + char *write = scan; + while (left) { + while (left && *scan==a) { + left--; + scan++; + } + while (left && *scan!=a) { + *write++ = *scan++; + left--; + n--; + } + } + length -= n; + } + return length; +} + +// remove a substring from a string +strl_t _strmod_remove(char *string, strl_t length, strl_t cap, strl_t start, strl_t len) +{ + if (startlength) + len = length-start; + int left = length-start-len; + if (left>0) { + const char *source = string+start+len; + char *dest = string+start; + for (int i = left; i; i--) + *dest++ = *source++; + } + length = length-len; + } + return length; +} + +// exchange a substring +strl_t _strmod_exchange(char *string, strl_t length, strl_t cap, strl_t start, strl_t size, const strref insert) +{ + if (start > length) + return length; + + if ((start + size) > length) + size = length - start; + + strl_t copy = insert.get_len(); + if ((start + copy) > cap) + copy = cap - start; + + if (copy < size) { + strl_t rem = size - insert.get_len(); + length = _strmod_remove(string, length, cap, start+size-rem, rem); + } else if (copy > size) { + strl_t ins = insert.get_len() - size; + strl_t left = length - size - start; + char *end = string + length + ins; + char *orig = string + length; + while (left--) + *--end = *--orig; + length += ins; + } + memcpy(string + start, insert.get(), copy); + return length; +} + + +// search and replace occurences of a string within a string +strl_t _strmod_inplace_replace_int(char *string, strl_t length, strl_t cap, const strref a, const strref b) +{ + char *scan = string; + strl_t left = length; + strl_t c = cap; + strl_t len_a = a.get_len(), len_b = b.get_len(); + if (len_a>left || !len_a) + return left; + + char *ps = scan, *pd = scan; + if (len_a>=len_b) { + int ss = strref(ps, left-strl_t(ps-scan)).find(a); + if (ss>=0) { + pd += ss; + ps += ss; + while (ss>=0 && strl_t(ss)c) + return left; // didn't fit in space + int ss = strref(scan, left).find_last(a); + int se = left; + pd += nl; + ps += left; + while (ss>=0) { + int cp = se-ss-len_a; + while (cp--) + *--pd = *--ps; + ps -= len_a; + const char *be = b.get()+len_b; + cp = len_b; + while (cp--) + *--pd = *--be; + se = ss; + ss = strref(scan, se).find_last(a); + } + return nl; + } + return left; +} + +// search and replace occurences of a string within a string +strl_t _strmod_inplace_replace_bookend_int(char *string, strl_t length, strl_t cap, const strref a, const strref b, const strref bookend) +{ + char *scan = string; + strl_t left = length; + strl_t c = cap; + strl_t len_a = a.get_len(), len_b = b.get_len(); + if (len_a>left || !len_a) + return left; + + char *ps = scan, *pd = scan; + if (len_a >= len_b) { + int ss = strref(ps, left - strl_t(ps - scan)).find_bookend(a, bookend); + if (ss >= 0) { + pd += ss; + ps += ss; + while (ss >= 0 && strl_t(ss)c) + return left; // didn't fit in space + int ss = strref(scan, left).find_last_bookend(a, bookend); + int se = left; + pd += nl; + ps += left; + while (ss >= 0) { + int cp = se - ss - len_a; + while (cp--) + *--pd = *--ps; + ps -= len_a; + if (b.get()) { + const char *be = b.get() + len_b; + cp = len_b; + while (cp--) + *--pd = *--be; + } + se = ss; + ss = strref(scan, se).find_last_bookend(a, bookend); + } + return nl; + } + return left; +} + +// convert a string to lowercase (7 bit ascii) +void _strmod_tolower(char *string, strl_t length) +{ + if (string) { + char *s = string; + for (int left = length; left>0; left--) { + *s = int_tolower_ascii7(*s); + s++; + } + } +} + +// convert a string to lowercase (windows extended ascii) +void _strmod_tolower_win_ascii(char *string, strl_t length) +{ + if (string) { + char *s = string; + for (int left = length; left>0; left--) { + *s = int_tolower_win_ascii(*s); + s++; + } + } +} + +// convert a string to lowercase (amiga extended ascii) +void _strmod_tolower_amiga_ascii(char *string, strl_t length) +{ + if (string) { + char *scan = string; + for (int left = length; left>0; left--) { + *scan = int_tolower_amiga_ascii(*scan); + scan++; + } + } +} + +// convert a string to lowercase (mac os extended ascii) +void _strmod_tolower_macos_ascii(char *string, strl_t length) +{ + if (string) { + char *scan = string; + for (int r = length; r>0; r--) { + *scan = int_tolower_macos_roman_ascii(*scan); + scan++; + } + } +} + +// convert a string to uppercase +void _strmod_toupper(char *string, strl_t length) +{ + if (string) { + char *scan = string; + for (int left = length; left>0; left--) { + *scan = int_toupper_ascii7(*scan); + scan++; + } + } +} + +// convert a string to uppercase +void _strmod_toupper_win_ascii(char *string, strl_t length) +{ + if (string) { + char *scan = string; + for (int left = length; left>0; left--) { + *scan = int_toupper_win_ascii(*scan); + scan++; + } + } +} + +// convert a string to uppercase +void _strmod_toupper_amiga_ascii(char *string, strl_t length) +{ + if (string) { + char *scan = string; + for (int left = length; left>0; left--) { + *scan = int_toupper_amiga_ascii(*scan); + scan++; + } + } +} + + +// convert a string to uppercase +void _strmod_toupper_macos_ascii(char *string, strl_t length) +{ + if (string) { + char *scan = string; + for (int left = length; left>0; left--) { + *scan = int_toupper_macos_roman_ascii(*scan); + scan++; + } + } +} + +strl_t _strmod_copy(char *string, strl_t cap, const char *str) +{ + strl_t length = 0; + if (str) { + while (*str && length < cap) + string[length++] = *str++; + } + return length; +} + +strl_t _strmod_copy(char *string, strl_t cap, strref str) +{ + strl_t length = 0; + if (str.valid()) { + const char *_str = str.get(); + for (strl_t len = str.get_len(); len && lengthlength) + chars = length - src; + if ((dst+chars)>cap) + chars = cap - dst; + char *ps = string+src, *pd = string+dst; + if (src>dst) { + while (chars--) + *pd++ = *ps++; + } else { + pd += chars; + ps += chars; + while (chars--) + *--pd = *--ps; + } + } +} + +void _strmod_shift(char *string, int offs, int len) { + char *dest = string + offs; + if (offs>0) { + string += len; + dest += len; + while (len--) + *--dest = *--string; + } else { + while (len--) + *dest++ = *string++; + } +} + +int _strmod_read_utf8(char *string, strl_t length, strl_t pos, strl_t &skip) { + if (pos >= length) { + skip = 0; + return 0; + } + + string += pos; + length -= pos; + const char *start = string; + const char *end = string + length; + unsigned int c = (unsigned int)*string++; + c &= 0x7f; + for (unsigned int m = 0x40; (m & c) && string=cap) + return 0; + char *write = string + pos; + cap -= pos; + if (code < 0x80) { + *write++ = code; + return 1; + } else if (cap>=2 && code < 0x800) { + *write++ = 0xc0 | (code >> 6); + *write++ = 0x80 | (code & 0x3f); + return 2; + } else if (cap>=3 && code < 0x10000) { + *write++ = 0xe0 | (code >> 12); + *write++ = 0x80 | ((code >> 6) & 0x3f); + *write++ = 0x80 | (code & 0x3f); + return 3; + } else if (cap>=4) { + *write++ = 0xf0 | ((code >> 18) & 7); + *write++ = 0x80 | ((code >> 12) & 0x3f); + *write++ = 0x80 | ((code >> 6) & 0x3f); + *write++ = 0x80 | (code & 0x3f); + return 4; + } + return 0; +} + +strl_t _strmod_utf8_tolower(char *string, strl_t length, strl_t cap) { + char *scan = string; + char *write = string; + char *end = string + length; + + while (scan cap) + return (strl_t)(write-string); + + // need to make room for new character code + if ((write+add)>scan) { + int m = (int)((write+add) - scan); + _strmod_shift(scan, m, (int)(end-scan)); + scan += m; + end += m; + } + skip = _strmod_write_utf8(write, cap, c, 0); + write += skip; + cap -= skip; + } + return (strl_t)(end-string); +} + +strl_t _strmod_utf8_toupper(char *string, strl_t length, strl_t cap) { + char *scan = string; + char *write = string; + char *end = string + length; + + while (scan cap) + return (strl_t)(write-string); + + // need to make room for new character code? + if ((write+add)>scan) { + int m = (int)((write+add) - scan); + _strmod_shift(scan, m, (int)(end-scan)); + scan += m; + end += m; + } + skip = _strmod_write_utf8(write, cap, c, 0); + write += skip; + cap -= skip; + } + return (strl_t)(end-string); +} + +#endif // STRUSE_IMPLEMENTATION + +/* revision history + 0.990 (2015-09-14) first public version + 1.000 (2015-09-15) added XML parser sample + 1.001 (2015-09-16) cleaned up XML parser + 1.002 (2015-09-17) added JSON parser sample + 1.003 (2015-09-20) straightening up of things + - wildcard add rewind & retry for multi step search, this caused valid finds to be ignored if invalid sub find occured + - fixed some minor wildcard search bugs, including word end including an extra character (whitespace) + - slightly more compact implementation, combining common code segments into static functions + - next_line() will return empty lines to match actual line count, line() works as before (returns only nonempty lines) + 1.004 (2015-09-22) added text file diff / patch sample + 1.005 (2015-09-28) added 6502 macro assembler sample + 1.006 (2015-10-04) added get_line() to get first line, fixed errors with getting hex/binary value+skip (ahextoui_skip, abinarytoui_skip) +*/ + +#endif // __STRUSE_H__