From e367471c534717a834b04d4295731046f5dc867a Mon Sep 17 00:00:00 2001 From: marcobaye Date: Sun, 8 Sep 2024 09:55:00 +0000 Subject: [PATCH] refactored string handling git-svn-id: https://svn.code.sf.net/p/acme-crossass/code-0/trunk@423 4df02467-bbd4-4a76-a152-e7ce94205b78 --- src/alu.c | 9 +--- src/flow.c | 20 +------ src/input.c | 124 +++++++++++++++++++++++++++----------------- src/input.h | 12 ++--- src/pseudoopcodes.c | 42 ++++++--------- src/version.h | 2 +- 6 files changed, 103 insertions(+), 106 deletions(-) diff --git a/src/alu.c b/src/alu.c index 0e9d9aa..745897a 100644 --- a/src/alu.c +++ b/src/alu.c @@ -418,16 +418,9 @@ static void parse_quoted(char closing_quote) { intval_t value; - dynabuf_clear(GlobalDynaBuf); - if (input_quoted_to_dynabuf(closing_quote)) + if (input_read_string(closing_quote)) goto fail; // unterminated or escaping error - // eat closing quote - GetByte(); - // now convert to unescaped version - if (input_unescape_dynabuf()) - goto fail; // escaping error - // without backslash escaping, both ' and " are used for single // characters. // with backslash escaping, ' is for characters and " is for strings: diff --git a/src/flow.c b/src/flow.c index 41bdcd8..673e264 100644 --- a/src/flow.c +++ b/src/flow.c @@ -144,26 +144,10 @@ void flow_forloop(struct for_loop *loop) // read condition, make copy, link to struct -// FIXME - change to some input_line_getcopy() fn, like input_block_getcopy()! +// FIXME - remove! static void copy_condition(struct condition *condition, char terminator) { - int err; - - SKIPSPACE(); - dynabuf_clear(GlobalDynaBuf); - while ((GotByte != terminator) && (GotByte != CHAR_EOS)) { - // append to GlobalDynaBuf and check for quotes - DYNABUF_APPEND(GlobalDynaBuf, GotByte); - if ((GotByte == '"') || (GotByte == '\'')) { - err = input_quoted_to_dynabuf(GotByte); - // here GotByte changes, it might become CHAR_EOS - DYNABUF_APPEND(GlobalDynaBuf, GotByte); // add closing quotes (or CHAR_EOS) as well - if (err) - break; // on error, exit before eating CHAR_EOS via GetByte() - } - GetByte(); - } - dynabuf_append(GlobalDynaBuf, CHAR_EOS); // ensure terminator + input_read_statement(terminator); condition->block.body = dynabuf_get_copy(GlobalDynaBuf); } diff --git a/src/input.c b/src/input.c index ed765b8..7be187b 100644 --- a/src/input.c +++ b/src/input.c @@ -459,7 +459,7 @@ fail: // This function delivers the next byte from the currently active byte source // in shortened high-level format. FIXME - use fn ptr? -// When inside quotes, use input_quoted_to_dynabuf() instead! +// do not use inside quotes! // CAUTION, symbol substitutions cause this fn to be called recursively! char GetByte(void) { @@ -569,23 +569,6 @@ static void get_quoted_byte(void) throw_error("Quotes still open at end of line."); } -// skip remainder of statement, for example on error -void parser_skip_remainder(void) -{ - // read characters until end-of-statement, but check for quotes, - // otherwise this might treat a quoted colon like EOS! - dynabuf_clear(GlobalDynaBuf); - while (GotByte != CHAR_EOS) { - // check for quotes - if ((GotByte == '"') || (GotByte == '\'')) { - if (input_quoted_to_dynabuf(GotByte)) - break; // error (CHAR_EOS before closing quote) - } - GetByte(); - } - dynabuf_clear(GlobalDynaBuf); -} - // ensure that the remainder of the current statement is empty, for example // after mnemonics using implied addressing. void parser_ensure_EOS(void) // now GotByte = first char to test @@ -605,8 +588,8 @@ void parser_ensure_EOS(void) // now GotByte = first char to test } // read string to dynabuf until closing quote is found -// returns 1 on errors (unterminated, escaping error) -int input_quoted_to_dynabuf(char closing_quote) +// returns 1 on error (unterminated) +static int quoted_to_dynabuf(char closing_quote) { boolean escaped = FALSE; @@ -617,14 +600,15 @@ int input_quoted_to_dynabuf(char closing_quote) return 1; // unterminated string constant; get_quoted_byte will have complained already if (escaped) { - // previous byte was backslash, so do not check for terminator nor backslash + // previous byte was backslash, so do not check for closing quote nor backslash escaped = FALSE; - // do not actually _convert_ escape sequences to their target byte, that is done by input_unescape_dynabuf() below! + // do not actually _convert_ escape sequences, that is + // done in input_read_string() below! // TODO - but maybe check for illegal escape sequences? // at the moment checking is only done when the string // gets used for something... } else { - // non-escaped: only terminator and backslash are of interest + // non-escaped: only closing quote and backslash are of interest if (GotByte == closing_quote) return 0; // ok @@ -635,19 +619,28 @@ int input_quoted_to_dynabuf(char closing_quote) } } -// process backslash escapes in GlobalDynaBuf (so size might shrink) -// returns 1 on errors (escaping errors) -// TODO - check: if this is only ever called directly after input_quoted_to_dynabuf, integrate that call here? -int input_unescape_dynabuf(void) +// clear dynabuf, read string to it until closing quote is found, then +// process backslash escapes (so size might shrink) +// returns 1 on error (unterminated or escaping error) +int input_read_string(char closing_quote) { - int read_index = 0, - write_index = 0; + int read_index, + write_index; char byte; boolean escaped; - if (config.dialect < V0_97__BACKSLASH_ESCAPING) - return 0; // ok + dynabuf_clear(GlobalDynaBuf); + if (quoted_to_dynabuf(closing_quote)) + return 1; // unterminated + // eat closing quote + GetByte(); +// now un-escape dynabuf contents: + if (config.dialect < V0_97__BACKSLASH_ESCAPING) + return 0; // ok (no escaping anyway) + + read_index = 0; + write_index = 0; escaped = FALSE; // CAUTION - contents of dynabuf are not terminated: while (read_index < GlobalDynaBuf->size) { @@ -690,6 +683,47 @@ int input_unescape_dynabuf(void) return 0; // ok } + +// skip remainder of statement, for example on error +// FIXME - compare to fn below! merge? +void parser_skip_remainder(void) +{ + // read characters until end-of-statement, but check for quotes, + // otherwise this might treat a quoted colon like EOS! + dynabuf_clear(GlobalDynaBuf); + while (GotByte != CHAR_EOS) { + // check for quotes + if ((GotByte == '"') || (GotByte == '\'')) { + if (quoted_to_dynabuf(GotByte)) + break; // error (CHAR_EOS before closing quote) + } + GetByte(); + } + dynabuf_clear(GlobalDynaBuf); +} + +// clear dynabuf, read remainder of statement into it, making sure to keep quoted stuff intact +void input_read_statement(char terminator) +{ + int err; + + SKIPSPACE(); + dynabuf_clear(GlobalDynaBuf); + while ((GotByte != terminator) && (GotByte != CHAR_EOS)) { + // append to GlobalDynaBuf and check for quotes + DYNABUF_APPEND(GlobalDynaBuf, GotByte); + if ((GotByte == '"') || (GotByte == '\'')) { + err = quoted_to_dynabuf(GotByte); + // here GotByte changes, it might become CHAR_EOS + DYNABUF_APPEND(GlobalDynaBuf, GotByte); // add closing quotes (or CHAR_EOS) as well + if (err) + break; // on error, exit before eating CHAR_EOS via GetByte() + } + GetByte(); + } + dynabuf_append(GlobalDynaBuf, CHAR_EOS); // ensure terminator +} + // Read block into GlobalDynabuf // (reading starts with next byte, so call directly after reading opening brace). // After calling this function, GotByte holds '}'. Unless EOF was found first, @@ -716,7 +750,7 @@ static void block_to_dynabuf(void) case '"': // Quotes? Okay, read quoted stuff. case '\'': - input_quoted_to_dynabuf(byte); + quoted_to_dynabuf(byte); DYNABUF_APPEND(GlobalDynaBuf, GotByte); // add closing quote break; case CHAR_SOB: @@ -870,10 +904,6 @@ static int read_filename_shared_end(boolean *absolute) return 1; // error } - // resolve backslash escapes - if (input_unescape_dynabuf()) - return 1; // escaping error - // terminate string dynabuf_append(GlobalDynaBuf, '\0'); // add another zero byte to make sure the buffer is large enough so the @@ -900,16 +930,14 @@ static int read_filename_shared_end(boolean *absolute) // parser_skip_remainder() then. int input_read_input_filename(struct filespecflags *flags) { - dynabuf_clear(GlobalDynaBuf); SKIPSPACE(); if (GotByte == '<') { // library access: flags->uses_lib = TRUE; // read file name string (must be a single string ) - if (input_quoted_to_dynabuf('>')) + if (input_read_string('>')) return 1; // unterminated or escaping error - GetByte(); // eat '>' terminator } else { // "normal", non-library access: flags->uses_lib = FALSE; @@ -919,17 +947,16 @@ int input_read_input_filename(struct filespecflags *flags) return 1; // error } // read file name string - if (input_quoted_to_dynabuf('"')) + if (input_read_string('"')) return 1; // unterminated or escaping error - GetByte(); // eat terminator // new algo: (FIXME) // it should be possible to construct the name of input file from symbols, so // build environments can define a name at one place and use it at another. // FIXME - use expression parser to read filename string! } - // check length, remember abs/rel, unescape, terminate, do platform conversion + // check length, remember abs/rel, terminate, do platform conversion return read_filename_shared_end(&flags->absolute); } @@ -1045,15 +1072,16 @@ int input_read_output_filename(void) throw_error("File name quotes not found (\"\")."); return 1; // error } - dynabuf_clear(GlobalDynaBuf); - // read file name string (must be a single string literal! do not change this!) - if (input_quoted_to_dynabuf('"')) + // read file name string (must be a single string literal! do not call + // the expression parser instead; run-time-determined file names should + // only be possible for input files. dear reader, please do not abuse + // the symbol expansion mechanism for this purpose :D) + if (input_read_string('"')) return 1; // unterminated or escaping error - GetByte(); // eat terminator - // check length, remember abs/rel, unescape, terminate, do platform conversion: + // check length, remember abs/rel, terminate, do platform conversion: if (read_filename_shared_end(&absolute)) - return 1; // empty string or escaping error + return 1; // empty string if (absolute) { // keep file name as it is diff --git a/src/input.h b/src/input.h index 4047199..9ff0dce 100644 --- a/src/input.h +++ b/src/input.h @@ -71,13 +71,13 @@ extern void parser_skip_remainder(void); // after mnemonics using implied addressing. extern void parser_ensure_EOS(void); -// read string to dynabuf until closing quote is found -// returns 1 on errors (unterminated, escaping error) -extern int input_quoted_to_dynabuf(char closing_quote); +// clear dynabuf, read string to it until closing quote is found, then process +// backslash escapes (so size might shrink) +// returns 1 on error (unterminated or escaping errors) +extern int input_read_string(char closing_quote); -// process backslash escapes in GlobalDynaBuf (so size might shrink) -// returns 1 on errors (escaping errors) -extern int input_unescape_dynabuf(void); +// clear dynabuf, read remainder of statement into it, making sure to keep quoted stuff intact +extern void input_read_statement(char terminator); // Skip block (starting with next byte, so call directly after reading opening brace). // After calling this function, GotByte holds '}'. Unless EOF was found first, diff --git a/src/pseudoopcodes.c b/src/pseudoopcodes.c index e62c47e..b427b5b 100644 --- a/src/pseudoopcodes.c +++ b/src/pseudoopcodes.c @@ -470,23 +470,18 @@ static enum eos encode_string(const struct encoder *inner_encoder, unsigned char // make given encoder the current one (for ALU-parsed values) encoder_current = inner_encoder; do { - // we need to keep the old string handler code, because if user selects - // older dialect, the new code will complain about string lengths > 1! + // we need to keep the old code for handling string literals, + // because if the user chooses a dialect < 0.97, the new code in + // the expression parser will complain about strings longer than + // one character -> string literals in old sources would stop to + // work! + // FIXME - there is another block like this, scan for ROOSTA! if ((GotByte == '"') && (config.dialect < V0_97__BACKSLASH_ESCAPING)) { // the old way of handling string literals: int offset; - dynabuf_clear(GlobalDynaBuf); - if (input_quoted_to_dynabuf('"')) - return SKIP_REMAINDER; // unterminated or escaping error - - // eat closing quote - GetByte(); - // now convert to unescaped version - // FIXME - next call does nothing because wantedsize; ++offset) @@ -1363,23 +1358,20 @@ static enum eos throw_src_string(enum debuglevel level, const char prefix[]) dynabuf_clear(user_message); dynabuf_add_string(user_message, prefix); do { + // we need to keep the old code for handling string literals, + // because if the user chooses a dialect < 0.97, the new code in + // the expression parser will complain about strings longer than + // one character -> string literals in old sources would stop to + // work! + // FIXME - there is another block like this, scan for ROOSTA! if ((GotByte == '"') && (config.dialect < V0_97__BACKSLASH_ESCAPING)) { - dynabuf_clear(GlobalDynaBuf); - if (input_quoted_to_dynabuf('"')) - return SKIP_REMAINDER; // unterminated or escaping error - - // eat closing quote - GetByte(); - // now convert to unescaped version - // FIXME - next call does nothing because wantedprint(&object, user_message); } diff --git a/src/version.h b/src/version.h index 0bfce42..a4d9868 100644 --- a/src/version.h +++ b/src/version.h @@ -9,7 +9,7 @@ #define RELEASE "0.97" // update before release FIXME #define CODENAME "Zem" // update before release -#define CHANGE_DATE "25 Aug" // update before release FIXME +#define CHANGE_DATE "26 Aug" // update before release FIXME #define CHANGE_YEAR "2024" // update before release //#define HOME_PAGE "http://home.pages.de/~mac_bacon/smorbrod/acme/" #define HOME_PAGE "http://sourceforge.net/p/acme-crossass/" // FIXME