diff --git a/SourceGen/AsmGen/AsmAcme.cs b/SourceGen/AsmGen/AsmAcme.cs
index 6f08de9..a226af7 100644
--- a/SourceGen/AsmGen/AsmAcme.cs
+++ b/SourceGen/AsmGen/AsmAcme.cs
@@ -540,7 +540,11 @@ namespace SourceGen.AsmGen {
             Debug.Assert(dfd.IsString);
             Debug.Assert(dfd.Length > 0);
 
-            bool highAscii = false;
+            if (dfd.FormatSubType == FormatDescriptor.SubType.HighAscii) {
+                OutputNoJoy(offset, dfd.Length, labelStr, commentStr);
+                return;
+            }
+
             int leadingBytes = 0;
 
             switch (dfd.FormatType) {
@@ -548,18 +552,11 @@ namespace SourceGen.AsmGen {
                 case FormatDescriptor.Type.StringReverse:
                 case FormatDescriptor.Type.StringNullTerm:
                 case FormatDescriptor.Type.StringDci:
-                    highAscii = (data[offset] & 0x80) != 0;
                     break;
                 case FormatDescriptor.Type.StringL8:
-                    if (dfd.Length > 1) {
-                        highAscii = (data[offset + 1] & 0x80) != 0;
-                    }
                     leadingBytes = 1;
                     break;
                 case FormatDescriptor.Type.StringL16:
-                    if (dfd.Length > 2) {
-                        highAscii = (data[offset + 2] & 0x80) != 0;
-                    }
                     leadingBytes = 2;
                     break;
                 default:
@@ -567,11 +564,6 @@ namespace SourceGen.AsmGen {
                     return;
             }
 
-            if (highAscii) {
-                OutputNoJoy(offset, dfd.Length, labelStr, commentStr);
-                return;
-            }
-
             StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, '"',
                 StringOpFormatter.RawOutputStyle.CommaSep, MAX_OPERAND_LEN,
                 CharEncoding.ConvertLowAscii);
diff --git a/SourceGen/AsmGen/AsmCc65.cs b/SourceGen/AsmGen/AsmCc65.cs
index 1a4164b..5677333 100644
--- a/SourceGen/AsmGen/AsmCc65.cs
+++ b/SourceGen/AsmGen/AsmCc65.cs
@@ -595,7 +595,6 @@ namespace SourceGen.AsmGen {
             Debug.Assert(dfd.IsString);
             Debug.Assert(dfd.Length > 0);
 
-            bool highAscii = false;
             int leadingBytes = 0;
             int trailingBytes = 0;
 
@@ -603,22 +602,14 @@ namespace SourceGen.AsmGen {
                 case FormatDescriptor.Type.StringGeneric:
                 case FormatDescriptor.Type.StringReverse:
                 case FormatDescriptor.Type.StringDci:
-                    highAscii = (data[offset] & 0x80) != 0;
                     break;
                 case FormatDescriptor.Type.StringNullTerm:
-                    highAscii = (data[offset] & 0x80) != 0;
                     trailingBytes = 1;
                     break;
                 case FormatDescriptor.Type.StringL8:
-                    if (dfd.Length > 1) {
-                        highAscii = (data[offset + 1] & 0x80) != 0;
-                    }
                     leadingBytes = 1;
                     break;
                 case FormatDescriptor.Type.StringL16:
-                    if (dfd.Length > 2) {
-                        highAscii = (data[offset + 2] & 0x80) != 0;
-                    }
                     leadingBytes = 2;
                     break;
                 default:
@@ -626,6 +617,7 @@ namespace SourceGen.AsmGen {
                     return;
             }
 
+            bool highAscii = (dfd.FormatSubType == FormatDescriptor.SubType.HighAscii);
             if (highAscii && dfd.FormatType != FormatDescriptor.Type.StringGeneric) {
                 OutputNoJoy(offset, dfd.Length, labelStr, commentStr);
                 return;
@@ -639,8 +631,7 @@ namespace SourceGen.AsmGen {
             }
 
             StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, '"',
-                StringOpFormatter.RawOutputStyle.CommaSep, MAX_OPERAND_LEN,
-                charConv);
+                StringOpFormatter.RawOutputStyle.CommaSep, MAX_OPERAND_LEN, charConv);
             stropf.FeedBytes(data, offset, dfd.Length - trailingBytes, leadingBytes, false);
 
             string opcodeStr = formatter.FormatPseudoOp(sDataOpNames.StrGeneric);
diff --git a/SourceGen/AsmGen/AsmMerlin32.cs b/SourceGen/AsmGen/AsmMerlin32.cs
index e41666f..2478bb1 100644
--- a/SourceGen/AsmGen/AsmMerlin32.cs
+++ b/SourceGen/AsmGen/AsmMerlin32.cs
@@ -479,7 +479,6 @@ namespace SourceGen.AsmGen {
             Debug.Assert(dfd.IsString);
             Debug.Assert(dfd.Length > 0);
 
-            bool highAscii = false;
             bool reverse = false;
             int leadingBytes = 0;
             string opcodeStr;
@@ -487,16 +486,13 @@ namespace SourceGen.AsmGen {
             switch (dfd.FormatType) {
                 case FormatDescriptor.Type.StringGeneric:
                     opcodeStr = sDataOpNames.StrGeneric;
-                    highAscii = (data[offset] & 0x80) != 0;
                     break;
                 case FormatDescriptor.Type.StringReverse:
                     opcodeStr = sDataOpNames.StrReverse;
-                    highAscii = (data[offset] & 0x80) != 0;
                     reverse = true;
                     break;
                 case FormatDescriptor.Type.StringNullTerm:
                     opcodeStr = sDataOpNames.StrGeneric;        // no pseudo-op for this
-                    highAscii = (data[offset] & 0x80) != 0;
                     if (dfd.Length == 1) {
                         // Empty string.  Just output the length byte(s) or null terminator.
                         GenerateShortSequence(offset, 1, out string opcode, out string operand);
@@ -506,37 +502,29 @@ namespace SourceGen.AsmGen {
                     break;
                 case FormatDescriptor.Type.StringL8:
                     opcodeStr = sDataOpNames.StrLen8;
-                    if (dfd.Length > 1) {
-                        highAscii = (data[offset + 1] & 0x80) != 0;
-                    }
                     leadingBytes = 1;
                     break;
                 case FormatDescriptor.Type.StringL16:
                     opcodeStr = sDataOpNames.StrLen16;
-                    if (dfd.Length > 2) {
-                        highAscii = (data[offset + 2] & 0x80) != 0;
-                    }
                     leadingBytes = 2;
                     break;
                 case FormatDescriptor.Type.StringDci:
                     opcodeStr = sDataOpNames.StrDci;
-                    highAscii = (data[offset] & 0x80) != 0;
                     break;
                 default:
                     Debug.Assert(false);
                     return;
             }
 
-            // Merlin 32 uses single-quote for low ASCII, double-quote for high ASCII.  When
-            // quoting the delimiter we use a hexadecimal value.  We need to bear in mind that
-            // we're forcing the characters to low ASCII, but the actual character being
-            // escaped might be in high ASCII.  Hence delim vs. delimReplace.
-            char delim = highAscii ? '"' : '\'';
+            // Merlin 32 uses single-quote for low ASCII, double-quote for high ASCII.
             CharEncoding.Convert charConv;
-            if (highAscii) {
+            char delim;
+            if (dfd.FormatSubType == FormatDescriptor.SubType.HighAscii) {
                 charConv = CharEncoding.ConvertHighAscii;
+                delim = '"';
             } else {
                 charConv = CharEncoding.ConvertLowAscii;
+                delim = '\'';
             }
 
             StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, delim,
diff --git a/SourceGen/AsmGen/AsmTass64.cs b/SourceGen/AsmGen/AsmTass64.cs
index 10bde47..7d0bb0a 100644
--- a/SourceGen/AsmGen/AsmTass64.cs
+++ b/SourceGen/AsmGen/AsmTass64.cs
@@ -532,51 +532,41 @@ namespace SourceGen.AsmGen {
             Debug.Assert(dfd.IsString);
             Debug.Assert(dfd.Length > 0);
 
-            bool highAscii = false;
             int hiddenLeadingBytes = 0;
             int shownLeadingBytes = 0;
             int trailingBytes = 0;
             string opcodeStr;
 
+            if (dfd.FormatSubType == FormatDescriptor.SubType.HighAscii) {
+                OutputNoJoy(offset, dfd.Length, labelStr, commentStr);
+                return;
+            }
+
             switch (dfd.FormatType) {
                 case FormatDescriptor.Type.StringGeneric:
                 case FormatDescriptor.Type.StringReverse:
                     opcodeStr = sDataOpNames.StrGeneric;
-                    highAscii = (data[offset] & 0x80) != 0;
                     break;
                 case FormatDescriptor.Type.StringNullTerm:
                     opcodeStr = sDataOpNames.StrNullTerm;
-                    highAscii = (data[offset] & 0x80) != 0;
                     trailingBytes = 1;
                     break;
                 case FormatDescriptor.Type.StringL8:
                     opcodeStr = sDataOpNames.StrLen8;
-                    if (dfd.Length > 1) {
-                        highAscii = (data[offset + 1] & 0x80) != 0;
-                    }
                     hiddenLeadingBytes = 1;
                     break;
                 case FormatDescriptor.Type.StringL16:
                     opcodeStr = sDataOpNames.StrGeneric;
-                    if (dfd.Length > 2) {
-                        highAscii = (data[offset + 2] & 0x80) != 0;
-                    }
                     shownLeadingBytes = 2;
                     break;
                 case FormatDescriptor.Type.StringDci:
                     opcodeStr = sDataOpNames.StrDci;
-                    highAscii = (data[offset] & 0x80) != 0;
                     break;
                 default:
                     Debug.Assert(false);
                     return;
             }
 
-            if (highAscii) {
-                OutputNoJoy(offset, dfd.Length, labelStr, commentStr);
-                return;
-            }
-
             StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, '"',
                 StringOpFormatter.RawOutputStyle.CommaSep, MAX_OPERAND_LEN,
                 CharEncoding.ConvertLowAscii);
diff --git a/SourceGen/DataAnalysis.cs b/SourceGen/DataAnalysis.cs
index 617169b..a5fb9ee 100644
--- a/SourceGen/DataAnalysis.cs
+++ b/SourceGen/DataAnalysis.cs
@@ -736,8 +736,10 @@ namespace SourceGen {
                 int asciiLen = RecognizeAscii(mFileData, start, end);
                 if (asciiLen >= minStringChars) {
                     LogV(start, "ASCII string, len=" + asciiLen + " bytes");
+                    bool isHigh = (mFileData[start] & 0x80) != 0;
                     mAnattribs[start].DataDescriptor = FormatDescriptor.Create(asciiLen,
-                        FormatDescriptor.Type.StringGeneric, FormatDescriptor.SubType.Ascii);
+                        FormatDescriptor.Type.StringGeneric, isHigh ?
+                        FormatDescriptor.SubType.HighAscii : FormatDescriptor.SubType.LowAscii);
                     start += asciiLen;
                     continue;
                 }
diff --git a/SourceGen/DisasmProject.cs b/SourceGen/DisasmProject.cs
index b505d86..ee6b542 100644
--- a/SourceGen/DisasmProject.cs
+++ b/SourceGen/DisasmProject.cs
@@ -308,12 +308,15 @@ namespace SourceGen {
         /// </summary>
         /// <param name="fileData">65xx data file contents.</param>
         /// <param name="dataFileName">Data file's filename (not pathname).</param>
-        public void SetFileData(byte[] fileData, string dataFileName) {
+        /// <param name="report">Reporting object for validation errors.</param>
+        public void SetFileData(byte[] fileData, string dataFileName, ref FileLoadReport report) {
             Debug.Assert(fileData.Length == FileDataLength);
             Debug.Assert(CRC32.OnWholeBuffer(0, fileData) == FileDataCrc32);
             mFileData = fileData;
             mDataFileName = dataFileName;
 
+            FixAndValidate(ref report);
+
 #if false
             ScanFileData();
 #endif
@@ -396,6 +399,98 @@ namespace SourceGen {
         }
 #endif
 
+        /// <summary>
+        /// Walks the list of format descriptors, fixing places where the data doesn't match.
+        /// </summary>
+        private void FixAndValidate(ref FileLoadReport report) {
+            Dictionary<int, FormatDescriptor> changes = new Dictionary<int, FormatDescriptor>();
+
+            foreach (KeyValuePair<int, FormatDescriptor> kvp in OperandFormats) {
+                FormatDescriptor dfd = kvp.Value;
+
+                // v1 project files specified string layouts as sub-types, and assumed they
+                // were high or low ASCII.  Numeric values could use the ASCII sub-type, which
+                // included both high and low.
+                //
+                // v2 project files changed this to make string layouts types, with the
+                // character encoding specified in the sub-type.  High and low ASCII became
+                // separate, explicitly specified items.
+                //
+                // When loading a v1 file, the old "Ascii" sub-type is deserialized to
+                // ASCII_GENERIC.  Now that we have access to the file data, we need to refine
+                // the sub-type to high or low.
+                if (dfd.FormatSubType == FormatDescriptor.SubType.ASCII_GENERIC) {
+                    FormatDescriptor newDfd;
+                    if (dfd.IsString) {
+                        // Determine the string encoding by looking at the first character.
+                        // For some strings (StringL8, StringL16) we need to skip forward a
+                        // byte or two.  Empty strings with lengths or null-termination will
+                        // be treated as low ASCII.
+                        int checkOffset = kvp.Key;
+                        if (dfd.FormatType == FormatDescriptor.Type.StringL8 && dfd.Length > 1) {
+                            checkOffset++;
+                        } else if (dfd.FormatType == FormatDescriptor.Type.StringL16 && dfd.Length > 2) {
+                            checkOffset += 2;
+                        }
+                        bool isHigh = (FileData[checkOffset] & 0x80) != 0;
+                        newDfd = FormatDescriptor.Create(dfd.Length, dfd.FormatType,
+                            isHigh ? FormatDescriptor.SubType.HighAscii :
+                                FormatDescriptor.SubType.LowAscii);
+                    } else if (dfd.IsNumeric) {
+                        // This is a character constant in an instruction or data operand, such
+                        // as ".dd1 'f'" or "LDA #'f'".  Could be multi-byte (even instructions
+                        // can be 16-bit).  This is a little awkward, because at this point we
+                        // can't tell the difference between instructions and data.
+                        //
+                        // However, we do know that instructions are always little-endian, that
+                        // opcodes are one byte, that data values > $ff can't be ASCII encoded,
+                        // and that $00 isn't a valid ASCII character.  So we can apply the
+                        // following test:
+                        // - if the length is 1, it's data; grab the first byte
+                        // - if it's NumericBE, it's data; grab the last byte
+                        // - if the second byte is $00, it's data; grab the first byte
+                        // - otherwise, it's an instruction; grab the second byte
+                        int checkOffset;
+                        if (dfd.FormatType == FormatDescriptor.Type.NumericBE) {
+                            Debug.Assert(dfd.Length <= FormatDescriptor.MAX_NUMERIC_LEN);
+                            checkOffset = kvp.Key + dfd.Length - 1;
+                        } else if (dfd.Length < 2 || FileData[kvp.Key + 1] == 0x00) {
+                            checkOffset = kvp.Key;
+                        } else {
+                            Debug.Assert(dfd.FormatType == FormatDescriptor.Type.NumericLE);
+                            checkOffset = kvp.Key + 1;
+                        }
+                        bool isHigh = (FileData[checkOffset] & 0x80) != 0;
+                        newDfd = FormatDescriptor.Create(dfd.Length, dfd.FormatType,
+                            isHigh ? FormatDescriptor.SubType.HighAscii :
+                                FormatDescriptor.SubType.LowAscii);
+                    } else {
+                        Debug.Assert(false);
+                        newDfd = dfd;
+                    }
+                    changes[kvp.Key] = newDfd;
+                    Debug.WriteLine("Fix +" + kvp.Key.ToString("x6") + ": " +
+                        dfd + " -> " + newDfd);
+                }
+            }
+
+            // apply changes to main list
+            foreach (KeyValuePair<int, FormatDescriptor> kvp in changes) {
+                OperandFormats[kvp.Key] = kvp.Value;
+                //report.Add(FileLoadItem.Type.Notice,
+                //    "Fixed format at +" + kvp.Key.ToString("x6"));
+            }
+
+            // TODO: validate strings
+            // - null-terminated strings must not have 0x00 bytes, except for the last byte,
+            //   which must be 0x00
+            // - the length stored in L8/L16 strings much match the format descriptor length
+            // - DCI strings must have the appropriate pattern for the high bit
+            //
+            // Note it is not required that string data match the encoding, since you're allowed
+            // to have random gunk mixed in.  It just can't violate the above rules.
+        }
+
         /// <summary>
         /// Loads platform symbol files and extension scripts.
         /// 
diff --git a/SourceGen/FormatDescriptor.cs b/SourceGen/FormatDescriptor.cs
index f7b6d63..6d5f1a2 100644
--- a/SourceGen/FormatDescriptor.cs
+++ b/SourceGen/FormatDescriptor.cs
@@ -66,6 +66,7 @@ namespace SourceGen {
         /// </summary>
         public enum SubType : byte {
             None = 0,
+            ASCII_GENERIC,      // internal place-holder, used when loading older projects
 
             // NumericLE/BE; default is "raw", which can have a context-specific display format
             Hex,
@@ -75,8 +76,9 @@ namespace SourceGen {
             Symbol,             // symbolic ref; replace with Expression, someday?
 
             // Strings and NumericLE/BE (single character)
-            Ascii,              // ASCII (with or without the high bit set)
-            C64Petscii,         // C64 PETSCII
+            LowAscii,           // ASCII (high bit clear)
+            HighAscii,          // ASCII (high bit set)
+            C64Petscii,         // C64 PETSCII (lower case $41-5a, upper case $c1-da)
             C64Screen,          // C64 screen code
 
             // Dense; no sub-types
@@ -85,7 +87,8 @@ namespace SourceGen {
             Ignore              // TODO(someday): use this for "don't care" sections
         }
 
-        private const int MAX_NUMERIC_LEN = 4;
+        // Maximum length of a NumericLE/BE item (32-bit value or 4-byte instruction).
+        public const int MAX_NUMERIC_LEN = 4;
 
         // Create some "stock" descriptors.  For simple cases we return one of these
         // instead of allocating a new object.
@@ -99,8 +102,8 @@ namespace SourceGen {
             Type.NumericLE, SubType.Decimal);
         private static FormatDescriptor ONE_BINARY = new FormatDescriptor(1,
             Type.NumericLE, SubType.Binary);
-        private static FormatDescriptor ONE_ASCII = new FormatDescriptor(1,
-            Type.NumericLE, SubType.Ascii);
+        private static FormatDescriptor ONE_LOW_ASCII = new FormatDescriptor(1,
+            Type.NumericLE, SubType.LowAscii);
 
         /// <summary>
         /// Length, in bytes, of the data to be formatted.
@@ -210,8 +213,8 @@ namespace SourceGen {
                             return ONE_DECIMAL;
                         case SubType.Binary:
                             return ONE_BINARY;
-                        case SubType.Ascii:
-                            return ONE_ASCII;
+                        case SubType.LowAscii:
+                            return ONE_LOW_ASCII;
                     }
                 }
             }
@@ -347,9 +350,12 @@ namespace SourceGen {
             if (IsString) {
                 string descr;
                 switch (FormatSubType) {
-                    case SubType.Ascii:
+                    case SubType.LowAscii:
                         descr = "ASCII";
                         break;
+                    case SubType.HighAscii:
+                        descr = "ASCII (high)";
+                        break;
                     case SubType.C64Petscii:
                         descr = "C64 PETSCII";
                         break;
@@ -411,12 +417,14 @@ namespace SourceGen {
                     return "Address";
                 case SubType.Symbol:
                     return "Symbol \"" + SymbolRef.Label + "\"";
-                case SubType.Ascii:
-                    return "ASCII";
+                case SubType.LowAscii:
+                    return "Numeric, ASCII";
+                case SubType.HighAscii:
+                    return "Numeric, ASCII (high)";
                 case SubType.C64Petscii:
-                    return "C64 PETSCII";
+                    return "Numeric, C64 PETSCII";
                 case SubType.C64Screen:
-                    return "C64 Screen";
+                    return "Numeric, C64 Screen";
 
                 default:
                     return "???";
diff --git a/SourceGen/MainController.cs b/SourceGen/MainController.cs
index c39648a..d1e56b1 100644
--- a/SourceGen/MainController.cs
+++ b/SourceGen/MainController.cs
@@ -991,6 +991,8 @@ namespace SourceGen {
                 }
             }
 
+            newProject.SetFileData(fileData, Path.GetFileName(dataPathName), ref report);
+
             // If there were warnings, notify the user and give the a chance to cancel.
             if (report.Count != 0) {
                 ProjectLoadIssues dlg = new ProjectLoadIssues(mMainWin, report.Format(),
@@ -1004,7 +1006,6 @@ namespace SourceGen {
 
             mProject = newProject;
             mProjectPathName = mProject.ProjectPathName = projPathName;
-            mProject.SetFileData(fileData, Path.GetFileName(dataPathName));
             FinishPrep();
         }
 
diff --git a/SourceGen/ProjectFile.cs b/SourceGen/ProjectFile.cs
index 118fdf6..76029a0 100644
--- a/SourceGen/ProjectFile.cs
+++ b/SourceGen/ProjectFile.cs
@@ -107,6 +107,9 @@ namespace SourceGen {
 
         /// <summary>
         /// Reads the specified file and deserializes it into the project.
+        ///
+        /// The deserialized form may include place-holder entries that can't be resolved
+        /// until the data file is available (see the ASCII_GENERIC string sub-type).
         /// </summary>
         /// <param name="pathName">Input path name.</param>
         /// <param name="proj">Project to deserialize into.</param>
@@ -670,10 +673,10 @@ namespace SourceGen {
             FormatDescriptor.Type format;
             FormatDescriptor.SubType subFormat;
 
-            // File version 1 used a different set of enumerated values for defining strings.
-            // Parse it out here.
             if ("String".Equals(sfd.Format)) {
-                subFormat = FormatDescriptor.SubType.Ascii;
+                // File version 1 used a different set of enumerated values for defining strings.
+                // Parse it out here.
+                subFormat = FormatDescriptor.SubType.ASCII_GENERIC;
                 if ("None".Equals(sfd.SubFormat)) {
                     format = FormatDescriptor.Type.StringGeneric;
                 } else if ("Reverse".Equals(sfd.SubFormat)) {
@@ -687,12 +690,8 @@ namespace SourceGen {
                 } else if ("Dci".Equals(sfd.SubFormat)) {
                     format = FormatDescriptor.Type.StringDci;
                 } else if ("DciReverse".Equals(sfd.SubFormat)) {
-                    // No longer supported.  Treating it as a generic string works poorly,
-                    // because the first byte will appear to be (say) high ASCII, but the rest
-                    // of the string will be low ASCII and get output as hex data.  If we
-                    // explicitly differentiated high/low ASCII we could make this work right.
-                    // We could also split the descriptor into two parts.  Nobody ever used
-                    // this but the regression tests, though, so we don't really care.
+                    // No longer supported.  Nobody ever used this but the regression tests,
+                    // though, so there's no reason to handle this nicely.
                     format = FormatDescriptor.Type.Dense;
                     subFormat = FormatDescriptor.SubType.None;
                 } else {
@@ -708,8 +707,15 @@ namespace SourceGen {
             try {
                 format = (FormatDescriptor.Type)Enum.Parse(
                     typeof(FormatDescriptor.Type), sfd.Format);
-                subFormat = (FormatDescriptor.SubType)Enum.Parse(
-                    typeof(FormatDescriptor.SubType), sfd.SubFormat);
+                if ("Ascii".Equals(sfd.SubFormat)) {
+                    // File version 1 used "Ascii" for all character data in numeric operands.
+                    // It applied to both low and high ASCII.
+                    subFormat = FormatDescriptor.SubType.ASCII_GENERIC;
+                } else {
+                    subFormat = (FormatDescriptor.SubType)Enum.Parse(
+                        typeof(FormatDescriptor.SubType), sfd.SubFormat);
+                }
+
             } catch (ArgumentException) {
                 report.Add(FileLoadItem.Type.Warning, Res.Strings.ERR_BAD_FD_FORMAT +
                     ": " + sfd.Format + "/" + sfd.SubFormat);
diff --git a/SourceGen/PseudoOp.cs b/SourceGen/PseudoOp.cs
index 988dde1..87032e6 100644
--- a/SourceGen/PseudoOp.cs
+++ b/SourceGen/PseudoOp.cs
@@ -541,11 +541,13 @@ namespace SourceGen {
                     return formatter.FormatDecimalValue(operandValue);
                 case FormatDescriptor.SubType.Binary:
                     return formatter.FormatBinaryValue(operandValue, hexMinLen * 4);
-                case FormatDescriptor.SubType.Ascii:
+                case FormatDescriptor.SubType.LowAscii:
+                case FormatDescriptor.SubType.HighAscii:
                 case FormatDescriptor.SubType.C64Petscii:
                 case FormatDescriptor.SubType.C64Screen:
                     // TODO(petscii): convert encoding; use a helper function *not* in
                     //   formatter -- pass converted char value in along with operandValue
+                    // TODO: pass in a "make high ASCII" string, e.g. "| 0x80", that fixes char
                     return formatter.FormatAsciiOrHex(operandValue);
                 case FormatDescriptor.SubType.Symbol:
                     if (symbolTable.TryGetValue(dfd.SymbolRef.Label, out Symbol sym)) {
@@ -575,6 +577,7 @@ namespace SourceGen {
                         return formatter.FormatHexValue(operandValue, hexMinLen);
                     }
                 default:
+                    // should not see REMOVE or ASCII_GENERIC here
                     Debug.Assert(false);
                     return "???";
             }
diff --git a/SourceGen/Tests/GenTest.cs b/SourceGen/Tests/GenTest.cs
index 7224ba2..e982a46 100644
--- a/SourceGen/Tests/GenTest.cs
+++ b/SourceGen/Tests/GenTest.cs
@@ -463,7 +463,8 @@ namespace SourceGen.Tests {
                     return null;
                 }
 
-                project.SetFileData(fileData, Path.GetFileName(dataPathName));
+                FileLoadReport unused = new FileLoadReport("test");
+                project.SetFileData(fileData, Path.GetFileName(dataPathName), ref unused);
                 project.ProjectPathName = projectPathName;
                 project.LoadExternalFiles();
             }
diff --git a/SourceGen/WpfGui/EditDataOperand.xaml.cs b/SourceGen/WpfGui/EditDataOperand.xaml.cs
index b368e11..f15019d 100644
--- a/SourceGen/WpfGui/EditDataOperand.xaml.cs
+++ b/SourceGen/WpfGui/EditDataOperand.xaml.cs
@@ -527,7 +527,8 @@ namespace SourceGen.WpfGui {
                             case FormatDescriptor.SubType.Binary:
                                 radioSimpleDataBinary.IsChecked = true;
                                 break;
-                            case FormatDescriptor.SubType.Ascii:
+                            case FormatDescriptor.SubType.LowAscii:
+                            case FormatDescriptor.SubType.HighAscii:
                             case FormatDescriptor.SubType.C64Petscii:
                             case FormatDescriptor.SubType.C64Screen:
                                 // TODO(petscii): update UI
@@ -631,8 +632,8 @@ namespace SourceGen.WpfGui {
                 } else if (radioSimpleDataBinary.IsChecked == true) {
                     subType = FormatDescriptor.SubType.Binary;
                 } else if (radioSimpleDataAscii.IsChecked == true) {
-                    // TODO(petscii): configure subType correctly
-                    subType = FormatDescriptor.SubType.Ascii;
+                    // TODO(petscii): add PETSCII buttons
+                    subType = FormatDescriptor.SubType.ASCII_GENERIC;
                 } else if (radioSimpleDataAddress.IsChecked == true) {
                     subType = FormatDescriptor.SubType.Address;
                 } else if (radioSimpleDataSymbolic.IsChecked == true) {
@@ -681,25 +682,27 @@ namespace SourceGen.WpfGui {
                 type = FormatDescriptor.Type.Dense;
             } else if (radioFill.IsChecked == true) {
                 type = FormatDescriptor.Type.Fill;
-                subType = FormatDescriptor.SubType.Ascii;    // TODO(petscii): set encoding
             } else if (radioStringMixed.IsChecked == true) {
+                // TODO(petscii): encoding format will come from a combo box; that determines
+                //   the subType and the arg to the string-creation functions, which use the
+                //   appropriate char encoding methods to break up the strings
                 type = FormatDescriptor.Type.StringGeneric;
-                subType = FormatDescriptor.SubType.Ascii;
+                subType = FormatDescriptor.SubType.LowAscii;
             } else if (radioStringMixedReverse.IsChecked == true) {
                 type = FormatDescriptor.Type.StringReverse;
-                subType = FormatDescriptor.SubType.Ascii;
+                subType = FormatDescriptor.SubType.LowAscii;
             } else if (radioStringNullTerm.IsChecked == true) {
                 type = FormatDescriptor.Type.StringNullTerm;
-                subType = FormatDescriptor.SubType.Ascii;
+                subType = FormatDescriptor.SubType.LowAscii;
             } else if (radioStringLen8.IsChecked == true) {
                 type = FormatDescriptor.Type.StringL8;
-                subType = FormatDescriptor.SubType.Ascii;
+                subType = FormatDescriptor.SubType.LowAscii;
             } else if (radioStringLen16.IsChecked == true) {
                 type = FormatDescriptor.Type.StringL16;
-                subType = FormatDescriptor.SubType.Ascii;
+                subType = FormatDescriptor.SubType.LowAscii;
             } else if (radioStringDci.IsChecked == true) {
                 type = FormatDescriptor.Type.StringDci;
-                subType = FormatDescriptor.SubType.Ascii;
+                subType = FormatDescriptor.SubType.LowAscii;
             } else {
                 Debug.Assert(false);
                 // default/none
@@ -762,8 +765,8 @@ namespace SourceGen.WpfGui {
             // length.  Either way, we only need to create the descriptor once.  (This is
             // safe because FormatDescriptor instances are immutable.)
             //
-            // Because certain details, like the fill byte and high-vs-low ASCII, are pulled
-            // out of the data stream at format time, we don't have to dig for them now.
+            // The one exception to this is ASCII values for non-string data, because we have
+            // to dig the low vs. high value out of the data itself.
             FormatDescriptor dfd;
             if (subType == FormatDescriptor.SubType.Symbol) {
                 dfd = FormatDescriptor.Create(chunkLength, symbolRef,
@@ -771,8 +774,19 @@ namespace SourceGen.WpfGui {
             } else {
                 dfd = FormatDescriptor.Create(chunkLength, type, subType);
             }
-
             while (low <= high) {
+                if (subType == FormatDescriptor.SubType.ASCII_GENERIC) {
+                    Debug.Assert(dfd.IsNumeric);
+                    int val = RawData.GetWord(mFileData, low, dfd.Length,
+                        type == FormatDescriptor.Type.NumericBE);
+                    FormatDescriptor.SubType actualSubType = (val > 0x7f) ?
+                        FormatDescriptor.SubType.HighAscii : FormatDescriptor.SubType.LowAscii;
+                    if (actualSubType != dfd.FormatSubType) {
+                        // replace the descriptor
+                        dfd = FormatDescriptor.Create(chunkLength, type, actualSubType);
+                    }
+                }
+
                 Results.Add(low, dfd);
                 low += chunkLength;
             }
@@ -833,13 +847,12 @@ namespace SourceGen.WpfGui {
         /// <param name="offset">Offset of first byte.</param>
         /// <param name="length">Length of string.</param>
         /// <param name="subType">String sub-type.</param>
-        private void CreateStringOrByte(int offset, int length,
-                FormatDescriptor.SubType subType) {
+        private void CreateStringOrByte(int offset, int length, FormatDescriptor.SubType subType) {
             Debug.Assert(length > 0);
             if (length == 1) {
-                // single byte, output as single ASCII char rather than 1-byte string
-                // TODO(petscii): low/high?
-                CreateByteFD(offset, FormatDescriptor.SubType.Ascii);
+                // Single byte, output as single char rather than 1-byte string.  We use the
+                // same encoding as the rest of the string.
+                CreateByteFD(offset, subType);
             } else {
                 FormatDescriptor dfd;
                 dfd = FormatDescriptor.Create(length,
diff --git a/SourceGen/WpfGui/EditInstructionOperand.xaml.cs b/SourceGen/WpfGui/EditInstructionOperand.xaml.cs
index 33b2fdd..8249b17 100644
--- a/SourceGen/WpfGui/EditInstructionOperand.xaml.cs
+++ b/SourceGen/WpfGui/EditInstructionOperand.xaml.cs
@@ -332,7 +332,8 @@ namespace SourceGen.WpfGui {
                 case FormatDescriptor.SubType.Binary:
                     preview.Append(mFormatter.FormatBinaryValue(mOperandValue, 8));
                     break;
-                case FormatDescriptor.SubType.Ascii:
+                case FormatDescriptor.SubType.LowAscii:
+                case FormatDescriptor.SubType.HighAscii:
                     // TODO(petscii): encoding
                     preview.Append(mFormatter.FormatAsciiOrHex(mOperandValue));
                     break;
@@ -470,8 +471,9 @@ namespace SourceGen.WpfGui {
                         case FormatDescriptor.SubType.Binary:
                             binaryButton.IsChecked = true;
                             break;
-                        case FormatDescriptor.SubType.Ascii:
-                        // TODO(petscii): encoding
+                        case FormatDescriptor.SubType.LowAscii:
+                        case FormatDescriptor.SubType.HighAscii:
+                            // TODO(petscii): encoding
                             asciiButton.IsChecked = true;
                             break;
                         case FormatDescriptor.SubType.Symbol:
@@ -552,7 +554,11 @@ namespace SourceGen.WpfGui {
                 subType = FormatDescriptor.SubType.Binary;
             } else if (asciiButton.IsChecked == true) {
                 // TODO(petscii): encoding
-                subType = FormatDescriptor.SubType.Ascii;
+                if (mOperandValue > 0x7f) {
+                    subType = FormatDescriptor.SubType.HighAscii;
+                } else {
+                    subType = FormatDescriptor.SubType.LowAscii;
+                }
             } else if (symbolButton.IsChecked == true) {
                 subType = FormatDescriptor.SubType.Symbol;
             } else {