From d65ab594619b66b3770eaa1b85acd0f05604c35a Mon Sep 17 00:00:00 2001
From: Andy McFadden <fadden@fadden.com>
Date: Sun, 1 Aug 2021 17:36:05 -0700
Subject: [PATCH] Don't reject strings with "invalid" characters

When formatting one or more strings with the Edit Data Operand dialog,
the code must determine which options to present.  If the selected
bytes appear to represent one or more null-terminated strings, that
option is enabled in the UI.

The "format recognizers" enforce some strict rules, e.g. null-
terminated strings must end in $00, and also try to confirm that the
data looks like a printable string.  The algorithm rejects strings
with "illegal" characters in them.  This is simpler on some systems
than others.  For example, C64 PETSCII defines quite a few control
characters in ways that make them useful for embedding in printable
strings.

The "recognizers" are only used by the operand edit feature, not as
part of an automated string detector, so there's no real upside in
overriding the user's desire to form a string with arbitrary bytes.

This removes the quick rejection from the four recognizers (null-term,
len8, len16, dci).  It does not alter the high-level code, which
still insists on a certain percentage of the string being printable;
that may be worth revisiting as well.

(issue #100)
---
 SourceGen/DataAnalysis.cs | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)
diff --git a/SourceGen/DataAnalysis.cs b/SourceGen/DataAnalysis.cs
index 499632e..6247e37 100644
--- a/SourceGen/DataAnalysis.cs
+++ b/SourceGen/DataAnalysis.cs
@@ -1031,8 +1031,6 @@ namespace SourceGen {
         /// Counts the number of null-terminated strings in the buffer.
         /// 
         /// Zero-length strings are allowed but not included in the count.
-        /// 
-        /// If any bad data is found, the scan aborts and returns -1.
         /// </summary>
         /// <param name="fileData">Raw data.</param>
         /// <param name="start">Offset of first byte in range.</param>
@@ -1070,10 +1068,10 @@ namespace SourceGen {
                             return -1;
                         }
                     }
-                    if (!charTest(val)) {
-                        // Not a matching character, fail.
-                        return -1;
-                    }
+                    //if (!charTest(val)) {
+                    //    // Not a matching character, fail.
+                    //    return -1;
+                    //}
                     stringLen++;
                 }
             }
@@ -1120,10 +1118,10 @@ namespace SourceGen {
                         // Mixed ASCII, fail.
                         return -1;
                     }
-                    if (!charTest(val)) {
-                        // Not a matching character, fail.
-                        return -1;
-                    }
+                    //if (!charTest(val)) {
+                    //    // Not a matching character, fail.
+                    //    return -1;
+                    //}
                 }
             }
 
@@ -1175,10 +1173,10 @@ namespace SourceGen {
                         // Mixed ASCII, fail.
                         return -1;
                     }
-                    if (!charTest(val)) {
-                        // Not a matching character, fail.
-                        return -1;
-                    }
+                    //if (!charTest(val)) {
+                    //    // Not a matching character, fail.
+                    //    return -1;
+                    //}
                 }
             }
 
@@ -1226,15 +1224,16 @@ namespace SourceGen {
                     stringLen++;
                 }
 
-                if (!charTest((byte)(val & 0x7f))) {
-                    // Not a matching character, fail.
-                    return -1;
-                }
+                //if (!charTest((byte)(val & 0x7f))) {
+                //    // Not a matching character, fail.
+                //    return -1;
+                //}
             }
 
             return stringCount;
         }
 
+#if false
         /// <summary>
         /// Counts strings in reverse Dextral Character Inverted format, meaning the string is
         /// stored in reverse order in memory, and the high bit on the first (last) byte is
@@ -1280,6 +1279,7 @@ namespace SourceGen {
 
             return stringCount;
         }
+#endif
 
         /// <summary>
         /// Verifies that the string data is what is expected.  Does not attempt to check