From d65ab594619b66b3770eaa1b85acd0f05604c35a Mon Sep 17 00:00:00 2001 From: Andy McFadden Date: Sun, 1 Aug 2021 17:36:05 -0700 Subject: [PATCH] Don't reject strings with "invalid" characters When formatting one or more strings with the Edit Data Operand dialog, the code must determine which options to present. If the selected bytes appear to represent one or more null-terminated strings, that option is enabled in the UI. The "format recognizers" enforce some strict rules, e.g. null- terminated strings must end in $00, and also try to confirm that the data looks like a printable string. The algorithm rejects strings with "illegal" characters in them. This is simpler on some systems than others. For example, C64 PETSCII defines quite a few control characters in ways that make them useful for embedding in printable strings. The "recognizers" are only used by the operand edit feature, not as part of an automated string detector, so there's no real upside in overriding the user's desire to form a string with arbitrary bytes. This removes the quick rejection from the four recognizers (null-term, len8, len16, dci). It does not alter the high-level code, which still insists on a certain percentage of the string being printable; that may be worth revisiting as well. (issue #100) --- SourceGen/DataAnalysis.cs | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/SourceGen/DataAnalysis.cs b/SourceGen/DataAnalysis.cs index 499632e..6247e37 100644 --- a/SourceGen/DataAnalysis.cs +++ b/SourceGen/DataAnalysis.cs @@ -1031,8 +1031,6 @@ namespace SourceGen { /// Counts the number of null-terminated strings in the buffer. /// /// Zero-length strings are allowed but not included in the count. - /// - /// If any bad data is found, the scan aborts and returns -1. /// /// Raw data. /// Offset of first byte in range. @@ -1070,10 +1068,10 @@ namespace SourceGen { return -1; } } - if (!charTest(val)) { - // Not a matching character, fail. - return -1; - } + //if (!charTest(val)) { + // // Not a matching character, fail. + // return -1; + //} stringLen++; } } @@ -1120,10 +1118,10 @@ namespace SourceGen { // Mixed ASCII, fail. return -1; } - if (!charTest(val)) { - // Not a matching character, fail. - return -1; - } + //if (!charTest(val)) { + // // Not a matching character, fail. + // return -1; + //} } } @@ -1175,10 +1173,10 @@ namespace SourceGen { // Mixed ASCII, fail. return -1; } - if (!charTest(val)) { - // Not a matching character, fail. - return -1; - } + //if (!charTest(val)) { + // // Not a matching character, fail. + // return -1; + //} } } @@ -1226,15 +1224,16 @@ namespace SourceGen { stringLen++; } - if (!charTest((byte)(val & 0x7f))) { - // Not a matching character, fail. - return -1; - } + //if (!charTest((byte)(val & 0x7f))) { + // // Not a matching character, fail. + // return -1; + //} } return stringCount; } +#if false /// /// Counts strings in reverse Dextral Character Inverted format, meaning the string is /// stored in reverse order in memory, and the high bit on the first (last) byte is @@ -1280,6 +1279,7 @@ namespace SourceGen { return stringCount; } +#endif /// /// Verifies that the string data is what is expected. Does not attempt to check