diff --git a/SourceGen/AsmGen/AsmAcme.cs b/SourceGen/AsmGen/AsmAcme.cs index 6f08de9..a226af7 100644 --- a/SourceGen/AsmGen/AsmAcme.cs +++ b/SourceGen/AsmGen/AsmAcme.cs @@ -540,7 +540,11 @@ namespace SourceGen.AsmGen { Debug.Assert(dfd.IsString); Debug.Assert(dfd.Length > 0); - bool highAscii = false; + if (dfd.FormatSubType == FormatDescriptor.SubType.HighAscii) { + OutputNoJoy(offset, dfd.Length, labelStr, commentStr); + return; + } + int leadingBytes = 0; switch (dfd.FormatType) { @@ -548,18 +552,11 @@ namespace SourceGen.AsmGen { case FormatDescriptor.Type.StringReverse: case FormatDescriptor.Type.StringNullTerm: case FormatDescriptor.Type.StringDci: - highAscii = (data[offset] & 0x80) != 0; break; case FormatDescriptor.Type.StringL8: - if (dfd.Length > 1) { - highAscii = (data[offset + 1] & 0x80) != 0; - } leadingBytes = 1; break; case FormatDescriptor.Type.StringL16: - if (dfd.Length > 2) { - highAscii = (data[offset + 2] & 0x80) != 0; - } leadingBytes = 2; break; default: @@ -567,11 +564,6 @@ namespace SourceGen.AsmGen { return; } - if (highAscii) { - OutputNoJoy(offset, dfd.Length, labelStr, commentStr); - return; - } - StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, '"', StringOpFormatter.RawOutputStyle.CommaSep, MAX_OPERAND_LEN, CharEncoding.ConvertLowAscii); diff --git a/SourceGen/AsmGen/AsmCc65.cs b/SourceGen/AsmGen/AsmCc65.cs index 1a4164b..5677333 100644 --- a/SourceGen/AsmGen/AsmCc65.cs +++ b/SourceGen/AsmGen/AsmCc65.cs @@ -595,7 +595,6 @@ namespace SourceGen.AsmGen { Debug.Assert(dfd.IsString); Debug.Assert(dfd.Length > 0); - bool highAscii = false; int leadingBytes = 0; int trailingBytes = 0; @@ -603,22 +602,14 @@ namespace SourceGen.AsmGen { case FormatDescriptor.Type.StringGeneric: case FormatDescriptor.Type.StringReverse: case FormatDescriptor.Type.StringDci: - highAscii = (data[offset] & 0x80) != 0; break; case FormatDescriptor.Type.StringNullTerm: - highAscii = (data[offset] & 0x80) != 0; trailingBytes = 1; break; case FormatDescriptor.Type.StringL8: - if (dfd.Length > 1) { - highAscii = (data[offset + 1] & 0x80) != 0; - } leadingBytes = 1; break; case FormatDescriptor.Type.StringL16: - if (dfd.Length > 2) { - highAscii = (data[offset + 2] & 0x80) != 0; - } leadingBytes = 2; break; default: @@ -626,6 +617,7 @@ namespace SourceGen.AsmGen { return; } + bool highAscii = (dfd.FormatSubType == FormatDescriptor.SubType.HighAscii); if (highAscii && dfd.FormatType != FormatDescriptor.Type.StringGeneric) { OutputNoJoy(offset, dfd.Length, labelStr, commentStr); return; @@ -639,8 +631,7 @@ namespace SourceGen.AsmGen { } StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, '"', - StringOpFormatter.RawOutputStyle.CommaSep, MAX_OPERAND_LEN, - charConv); + StringOpFormatter.RawOutputStyle.CommaSep, MAX_OPERAND_LEN, charConv); stropf.FeedBytes(data, offset, dfd.Length - trailingBytes, leadingBytes, false); string opcodeStr = formatter.FormatPseudoOp(sDataOpNames.StrGeneric); diff --git a/SourceGen/AsmGen/AsmMerlin32.cs b/SourceGen/AsmGen/AsmMerlin32.cs index e41666f..2478bb1 100644 --- a/SourceGen/AsmGen/AsmMerlin32.cs +++ b/SourceGen/AsmGen/AsmMerlin32.cs @@ -479,7 +479,6 @@ namespace SourceGen.AsmGen { Debug.Assert(dfd.IsString); Debug.Assert(dfd.Length > 0); - bool highAscii = false; bool reverse = false; int leadingBytes = 0; string opcodeStr; @@ -487,16 +486,13 @@ namespace SourceGen.AsmGen { switch (dfd.FormatType) { case FormatDescriptor.Type.StringGeneric: opcodeStr = sDataOpNames.StrGeneric; - highAscii = (data[offset] & 0x80) != 0; break; case FormatDescriptor.Type.StringReverse: opcodeStr = sDataOpNames.StrReverse; - highAscii = (data[offset] & 0x80) != 0; reverse = true; break; case FormatDescriptor.Type.StringNullTerm: opcodeStr = sDataOpNames.StrGeneric; // no pseudo-op for this - highAscii = (data[offset] & 0x80) != 0; if (dfd.Length == 1) { // Empty string. Just output the length byte(s) or null terminator. GenerateShortSequence(offset, 1, out string opcode, out string operand); @@ -506,37 +502,29 @@ namespace SourceGen.AsmGen { break; case FormatDescriptor.Type.StringL8: opcodeStr = sDataOpNames.StrLen8; - if (dfd.Length > 1) { - highAscii = (data[offset + 1] & 0x80) != 0; - } leadingBytes = 1; break; case FormatDescriptor.Type.StringL16: opcodeStr = sDataOpNames.StrLen16; - if (dfd.Length > 2) { - highAscii = (data[offset + 2] & 0x80) != 0; - } leadingBytes = 2; break; case FormatDescriptor.Type.StringDci: opcodeStr = sDataOpNames.StrDci; - highAscii = (data[offset] & 0x80) != 0; break; default: Debug.Assert(false); return; } - // Merlin 32 uses single-quote for low ASCII, double-quote for high ASCII. When - // quoting the delimiter we use a hexadecimal value. We need to bear in mind that - // we're forcing the characters to low ASCII, but the actual character being - // escaped might be in high ASCII. Hence delim vs. delimReplace. - char delim = highAscii ? '"' : '\''; + // Merlin 32 uses single-quote for low ASCII, double-quote for high ASCII. CharEncoding.Convert charConv; - if (highAscii) { + char delim; + if (dfd.FormatSubType == FormatDescriptor.SubType.HighAscii) { charConv = CharEncoding.ConvertHighAscii; + delim = '"'; } else { charConv = CharEncoding.ConvertLowAscii; + delim = '\''; } StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, delim, diff --git a/SourceGen/AsmGen/AsmTass64.cs b/SourceGen/AsmGen/AsmTass64.cs index 10bde47..7d0bb0a 100644 --- a/SourceGen/AsmGen/AsmTass64.cs +++ b/SourceGen/AsmGen/AsmTass64.cs @@ -532,51 +532,41 @@ namespace SourceGen.AsmGen { Debug.Assert(dfd.IsString); Debug.Assert(dfd.Length > 0); - bool highAscii = false; int hiddenLeadingBytes = 0; int shownLeadingBytes = 0; int trailingBytes = 0; string opcodeStr; + if (dfd.FormatSubType == FormatDescriptor.SubType.HighAscii) { + OutputNoJoy(offset, dfd.Length, labelStr, commentStr); + return; + } + switch (dfd.FormatType) { case FormatDescriptor.Type.StringGeneric: case FormatDescriptor.Type.StringReverse: opcodeStr = sDataOpNames.StrGeneric; - highAscii = (data[offset] & 0x80) != 0; break; case FormatDescriptor.Type.StringNullTerm: opcodeStr = sDataOpNames.StrNullTerm; - highAscii = (data[offset] & 0x80) != 0; trailingBytes = 1; break; case FormatDescriptor.Type.StringL8: opcodeStr = sDataOpNames.StrLen8; - if (dfd.Length > 1) { - highAscii = (data[offset + 1] & 0x80) != 0; - } hiddenLeadingBytes = 1; break; case FormatDescriptor.Type.StringL16: opcodeStr = sDataOpNames.StrGeneric; - if (dfd.Length > 2) { - highAscii = (data[offset + 2] & 0x80) != 0; - } shownLeadingBytes = 2; break; case FormatDescriptor.Type.StringDci: opcodeStr = sDataOpNames.StrDci; - highAscii = (data[offset] & 0x80) != 0; break; default: Debug.Assert(false); return; } - if (highAscii) { - OutputNoJoy(offset, dfd.Length, labelStr, commentStr); - return; - } - StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, '"', StringOpFormatter.RawOutputStyle.CommaSep, MAX_OPERAND_LEN, CharEncoding.ConvertLowAscii); diff --git a/SourceGen/DataAnalysis.cs b/SourceGen/DataAnalysis.cs index 617169b..a5fb9ee 100644 --- a/SourceGen/DataAnalysis.cs +++ b/SourceGen/DataAnalysis.cs @@ -736,8 +736,10 @@ namespace SourceGen { int asciiLen = RecognizeAscii(mFileData, start, end); if (asciiLen >= minStringChars) { LogV(start, "ASCII string, len=" + asciiLen + " bytes"); + bool isHigh = (mFileData[start] & 0x80) != 0; mAnattribs[start].DataDescriptor = FormatDescriptor.Create(asciiLen, - FormatDescriptor.Type.StringGeneric, FormatDescriptor.SubType.Ascii); + FormatDescriptor.Type.StringGeneric, isHigh ? + FormatDescriptor.SubType.HighAscii : FormatDescriptor.SubType.LowAscii); start += asciiLen; continue; } diff --git a/SourceGen/DisasmProject.cs b/SourceGen/DisasmProject.cs index b505d86..ee6b542 100644 --- a/SourceGen/DisasmProject.cs +++ b/SourceGen/DisasmProject.cs @@ -308,12 +308,15 @@ namespace SourceGen { /// /// 65xx data file contents. /// Data file's filename (not pathname). - public void SetFileData(byte[] fileData, string dataFileName) { + /// Reporting object for validation errors. + public void SetFileData(byte[] fileData, string dataFileName, ref FileLoadReport report) { Debug.Assert(fileData.Length == FileDataLength); Debug.Assert(CRC32.OnWholeBuffer(0, fileData) == FileDataCrc32); mFileData = fileData; mDataFileName = dataFileName; + FixAndValidate(ref report); + #if false ScanFileData(); #endif @@ -396,6 +399,98 @@ namespace SourceGen { } #endif + /// + /// Walks the list of format descriptors, fixing places where the data doesn't match. + /// + private void FixAndValidate(ref FileLoadReport report) { + Dictionary changes = new Dictionary(); + + foreach (KeyValuePair kvp in OperandFormats) { + FormatDescriptor dfd = kvp.Value; + + // v1 project files specified string layouts as sub-types, and assumed they + // were high or low ASCII. Numeric values could use the ASCII sub-type, which + // included both high and low. + // + // v2 project files changed this to make string layouts types, with the + // character encoding specified in the sub-type. High and low ASCII became + // separate, explicitly specified items. + // + // When loading a v1 file, the old "Ascii" sub-type is deserialized to + // ASCII_GENERIC. Now that we have access to the file data, we need to refine + // the sub-type to high or low. + if (dfd.FormatSubType == FormatDescriptor.SubType.ASCII_GENERIC) { + FormatDescriptor newDfd; + if (dfd.IsString) { + // Determine the string encoding by looking at the first character. + // For some strings (StringL8, StringL16) we need to skip forward a + // byte or two. Empty strings with lengths or null-termination will + // be treated as low ASCII. + int checkOffset = kvp.Key; + if (dfd.FormatType == FormatDescriptor.Type.StringL8 && dfd.Length > 1) { + checkOffset++; + } else if (dfd.FormatType == FormatDescriptor.Type.StringL16 && dfd.Length > 2) { + checkOffset += 2; + } + bool isHigh = (FileData[checkOffset] & 0x80) != 0; + newDfd = FormatDescriptor.Create(dfd.Length, dfd.FormatType, + isHigh ? FormatDescriptor.SubType.HighAscii : + FormatDescriptor.SubType.LowAscii); + } else if (dfd.IsNumeric) { + // This is a character constant in an instruction or data operand, such + // as ".dd1 'f'" or "LDA #'f'". Could be multi-byte (even instructions + // can be 16-bit). This is a little awkward, because at this point we + // can't tell the difference between instructions and data. + // + // However, we do know that instructions are always little-endian, that + // opcodes are one byte, that data values > $ff can't be ASCII encoded, + // and that $00 isn't a valid ASCII character. So we can apply the + // following test: + // - if the length is 1, it's data; grab the first byte + // - if it's NumericBE, it's data; grab the last byte + // - if the second byte is $00, it's data; grab the first byte + // - otherwise, it's an instruction; grab the second byte + int checkOffset; + if (dfd.FormatType == FormatDescriptor.Type.NumericBE) { + Debug.Assert(dfd.Length <= FormatDescriptor.MAX_NUMERIC_LEN); + checkOffset = kvp.Key + dfd.Length - 1; + } else if (dfd.Length < 2 || FileData[kvp.Key + 1] == 0x00) { + checkOffset = kvp.Key; + } else { + Debug.Assert(dfd.FormatType == FormatDescriptor.Type.NumericLE); + checkOffset = kvp.Key + 1; + } + bool isHigh = (FileData[checkOffset] & 0x80) != 0; + newDfd = FormatDescriptor.Create(dfd.Length, dfd.FormatType, + isHigh ? FormatDescriptor.SubType.HighAscii : + FormatDescriptor.SubType.LowAscii); + } else { + Debug.Assert(false); + newDfd = dfd; + } + changes[kvp.Key] = newDfd; + Debug.WriteLine("Fix +" + kvp.Key.ToString("x6") + ": " + + dfd + " -> " + newDfd); + } + } + + // apply changes to main list + foreach (KeyValuePair kvp in changes) { + OperandFormats[kvp.Key] = kvp.Value; + //report.Add(FileLoadItem.Type.Notice, + // "Fixed format at +" + kvp.Key.ToString("x6")); + } + + // TODO: validate strings + // - null-terminated strings must not have 0x00 bytes, except for the last byte, + // which must be 0x00 + // - the length stored in L8/L16 strings much match the format descriptor length + // - DCI strings must have the appropriate pattern for the high bit + // + // Note it is not required that string data match the encoding, since you're allowed + // to have random gunk mixed in. It just can't violate the above rules. + } + /// /// Loads platform symbol files and extension scripts. /// diff --git a/SourceGen/FormatDescriptor.cs b/SourceGen/FormatDescriptor.cs index f7b6d63..6d5f1a2 100644 --- a/SourceGen/FormatDescriptor.cs +++ b/SourceGen/FormatDescriptor.cs @@ -66,6 +66,7 @@ namespace SourceGen { /// public enum SubType : byte { None = 0, + ASCII_GENERIC, // internal place-holder, used when loading older projects // NumericLE/BE; default is "raw", which can have a context-specific display format Hex, @@ -75,8 +76,9 @@ namespace SourceGen { Symbol, // symbolic ref; replace with Expression, someday? // Strings and NumericLE/BE (single character) - Ascii, // ASCII (with or without the high bit set) - C64Petscii, // C64 PETSCII + LowAscii, // ASCII (high bit clear) + HighAscii, // ASCII (high bit set) + C64Petscii, // C64 PETSCII (lower case $41-5a, upper case $c1-da) C64Screen, // C64 screen code // Dense; no sub-types @@ -85,7 +87,8 @@ namespace SourceGen { Ignore // TODO(someday): use this for "don't care" sections } - private const int MAX_NUMERIC_LEN = 4; + // Maximum length of a NumericLE/BE item (32-bit value or 4-byte instruction). + public const int MAX_NUMERIC_LEN = 4; // Create some "stock" descriptors. For simple cases we return one of these // instead of allocating a new object. @@ -99,8 +102,8 @@ namespace SourceGen { Type.NumericLE, SubType.Decimal); private static FormatDescriptor ONE_BINARY = new FormatDescriptor(1, Type.NumericLE, SubType.Binary); - private static FormatDescriptor ONE_ASCII = new FormatDescriptor(1, - Type.NumericLE, SubType.Ascii); + private static FormatDescriptor ONE_LOW_ASCII = new FormatDescriptor(1, + Type.NumericLE, SubType.LowAscii); /// /// Length, in bytes, of the data to be formatted. @@ -210,8 +213,8 @@ namespace SourceGen { return ONE_DECIMAL; case SubType.Binary: return ONE_BINARY; - case SubType.Ascii: - return ONE_ASCII; + case SubType.LowAscii: + return ONE_LOW_ASCII; } } } @@ -347,9 +350,12 @@ namespace SourceGen { if (IsString) { string descr; switch (FormatSubType) { - case SubType.Ascii: + case SubType.LowAscii: descr = "ASCII"; break; + case SubType.HighAscii: + descr = "ASCII (high)"; + break; case SubType.C64Petscii: descr = "C64 PETSCII"; break; @@ -411,12 +417,14 @@ namespace SourceGen { return "Address"; case SubType.Symbol: return "Symbol \"" + SymbolRef.Label + "\""; - case SubType.Ascii: - return "ASCII"; + case SubType.LowAscii: + return "Numeric, ASCII"; + case SubType.HighAscii: + return "Numeric, ASCII (high)"; case SubType.C64Petscii: - return "C64 PETSCII"; + return "Numeric, C64 PETSCII"; case SubType.C64Screen: - return "C64 Screen"; + return "Numeric, C64 Screen"; default: return "???"; diff --git a/SourceGen/MainController.cs b/SourceGen/MainController.cs index c39648a..d1e56b1 100644 --- a/SourceGen/MainController.cs +++ b/SourceGen/MainController.cs @@ -991,6 +991,8 @@ namespace SourceGen { } } + newProject.SetFileData(fileData, Path.GetFileName(dataPathName), ref report); + // If there were warnings, notify the user and give the a chance to cancel. if (report.Count != 0) { ProjectLoadIssues dlg = new ProjectLoadIssues(mMainWin, report.Format(), @@ -1004,7 +1006,6 @@ namespace SourceGen { mProject = newProject; mProjectPathName = mProject.ProjectPathName = projPathName; - mProject.SetFileData(fileData, Path.GetFileName(dataPathName)); FinishPrep(); } diff --git a/SourceGen/ProjectFile.cs b/SourceGen/ProjectFile.cs index 118fdf6..76029a0 100644 --- a/SourceGen/ProjectFile.cs +++ b/SourceGen/ProjectFile.cs @@ -107,6 +107,9 @@ namespace SourceGen { /// /// Reads the specified file and deserializes it into the project. + /// + /// The deserialized form may include place-holder entries that can't be resolved + /// until the data file is available (see the ASCII_GENERIC string sub-type). /// /// Input path name. /// Project to deserialize into. @@ -670,10 +673,10 @@ namespace SourceGen { FormatDescriptor.Type format; FormatDescriptor.SubType subFormat; - // File version 1 used a different set of enumerated values for defining strings. - // Parse it out here. if ("String".Equals(sfd.Format)) { - subFormat = FormatDescriptor.SubType.Ascii; + // File version 1 used a different set of enumerated values for defining strings. + // Parse it out here. + subFormat = FormatDescriptor.SubType.ASCII_GENERIC; if ("None".Equals(sfd.SubFormat)) { format = FormatDescriptor.Type.StringGeneric; } else if ("Reverse".Equals(sfd.SubFormat)) { @@ -687,12 +690,8 @@ namespace SourceGen { } else if ("Dci".Equals(sfd.SubFormat)) { format = FormatDescriptor.Type.StringDci; } else if ("DciReverse".Equals(sfd.SubFormat)) { - // No longer supported. Treating it as a generic string works poorly, - // because the first byte will appear to be (say) high ASCII, but the rest - // of the string will be low ASCII and get output as hex data. If we - // explicitly differentiated high/low ASCII we could make this work right. - // We could also split the descriptor into two parts. Nobody ever used - // this but the regression tests, though, so we don't really care. + // No longer supported. Nobody ever used this but the regression tests, + // though, so there's no reason to handle this nicely. format = FormatDescriptor.Type.Dense; subFormat = FormatDescriptor.SubType.None; } else { @@ -708,8 +707,15 @@ namespace SourceGen { try { format = (FormatDescriptor.Type)Enum.Parse( typeof(FormatDescriptor.Type), sfd.Format); - subFormat = (FormatDescriptor.SubType)Enum.Parse( - typeof(FormatDescriptor.SubType), sfd.SubFormat); + if ("Ascii".Equals(sfd.SubFormat)) { + // File version 1 used "Ascii" for all character data in numeric operands. + // It applied to both low and high ASCII. + subFormat = FormatDescriptor.SubType.ASCII_GENERIC; + } else { + subFormat = (FormatDescriptor.SubType)Enum.Parse( + typeof(FormatDescriptor.SubType), sfd.SubFormat); + } + } catch (ArgumentException) { report.Add(FileLoadItem.Type.Warning, Res.Strings.ERR_BAD_FD_FORMAT + ": " + sfd.Format + "/" + sfd.SubFormat); diff --git a/SourceGen/PseudoOp.cs b/SourceGen/PseudoOp.cs index 988dde1..87032e6 100644 --- a/SourceGen/PseudoOp.cs +++ b/SourceGen/PseudoOp.cs @@ -541,11 +541,13 @@ namespace SourceGen { return formatter.FormatDecimalValue(operandValue); case FormatDescriptor.SubType.Binary: return formatter.FormatBinaryValue(operandValue, hexMinLen * 4); - case FormatDescriptor.SubType.Ascii: + case FormatDescriptor.SubType.LowAscii: + case FormatDescriptor.SubType.HighAscii: case FormatDescriptor.SubType.C64Petscii: case FormatDescriptor.SubType.C64Screen: // TODO(petscii): convert encoding; use a helper function *not* in // formatter -- pass converted char value in along with operandValue + // TODO: pass in a "make high ASCII" string, e.g. "| 0x80", that fixes char return formatter.FormatAsciiOrHex(operandValue); case FormatDescriptor.SubType.Symbol: if (symbolTable.TryGetValue(dfd.SymbolRef.Label, out Symbol sym)) { @@ -575,6 +577,7 @@ namespace SourceGen { return formatter.FormatHexValue(operandValue, hexMinLen); } default: + // should not see REMOVE or ASCII_GENERIC here Debug.Assert(false); return "???"; } diff --git a/SourceGen/Tests/GenTest.cs b/SourceGen/Tests/GenTest.cs index 7224ba2..e982a46 100644 --- a/SourceGen/Tests/GenTest.cs +++ b/SourceGen/Tests/GenTest.cs @@ -463,7 +463,8 @@ namespace SourceGen.Tests { return null; } - project.SetFileData(fileData, Path.GetFileName(dataPathName)); + FileLoadReport unused = new FileLoadReport("test"); + project.SetFileData(fileData, Path.GetFileName(dataPathName), ref unused); project.ProjectPathName = projectPathName; project.LoadExternalFiles(); } diff --git a/SourceGen/WpfGui/EditDataOperand.xaml.cs b/SourceGen/WpfGui/EditDataOperand.xaml.cs index b368e11..f15019d 100644 --- a/SourceGen/WpfGui/EditDataOperand.xaml.cs +++ b/SourceGen/WpfGui/EditDataOperand.xaml.cs @@ -527,7 +527,8 @@ namespace SourceGen.WpfGui { case FormatDescriptor.SubType.Binary: radioSimpleDataBinary.IsChecked = true; break; - case FormatDescriptor.SubType.Ascii: + case FormatDescriptor.SubType.LowAscii: + case FormatDescriptor.SubType.HighAscii: case FormatDescriptor.SubType.C64Petscii: case FormatDescriptor.SubType.C64Screen: // TODO(petscii): update UI @@ -631,8 +632,8 @@ namespace SourceGen.WpfGui { } else if (radioSimpleDataBinary.IsChecked == true) { subType = FormatDescriptor.SubType.Binary; } else if (radioSimpleDataAscii.IsChecked == true) { - // TODO(petscii): configure subType correctly - subType = FormatDescriptor.SubType.Ascii; + // TODO(petscii): add PETSCII buttons + subType = FormatDescriptor.SubType.ASCII_GENERIC; } else if (radioSimpleDataAddress.IsChecked == true) { subType = FormatDescriptor.SubType.Address; } else if (radioSimpleDataSymbolic.IsChecked == true) { @@ -681,25 +682,27 @@ namespace SourceGen.WpfGui { type = FormatDescriptor.Type.Dense; } else if (radioFill.IsChecked == true) { type = FormatDescriptor.Type.Fill; - subType = FormatDescriptor.SubType.Ascii; // TODO(petscii): set encoding } else if (radioStringMixed.IsChecked == true) { + // TODO(petscii): encoding format will come from a combo box; that determines + // the subType and the arg to the string-creation functions, which use the + // appropriate char encoding methods to break up the strings type = FormatDescriptor.Type.StringGeneric; - subType = FormatDescriptor.SubType.Ascii; + subType = FormatDescriptor.SubType.LowAscii; } else if (radioStringMixedReverse.IsChecked == true) { type = FormatDescriptor.Type.StringReverse; - subType = FormatDescriptor.SubType.Ascii; + subType = FormatDescriptor.SubType.LowAscii; } else if (radioStringNullTerm.IsChecked == true) { type = FormatDescriptor.Type.StringNullTerm; - subType = FormatDescriptor.SubType.Ascii; + subType = FormatDescriptor.SubType.LowAscii; } else if (radioStringLen8.IsChecked == true) { type = FormatDescriptor.Type.StringL8; - subType = FormatDescriptor.SubType.Ascii; + subType = FormatDescriptor.SubType.LowAscii; } else if (radioStringLen16.IsChecked == true) { type = FormatDescriptor.Type.StringL16; - subType = FormatDescriptor.SubType.Ascii; + subType = FormatDescriptor.SubType.LowAscii; } else if (radioStringDci.IsChecked == true) { type = FormatDescriptor.Type.StringDci; - subType = FormatDescriptor.SubType.Ascii; + subType = FormatDescriptor.SubType.LowAscii; } else { Debug.Assert(false); // default/none @@ -762,8 +765,8 @@ namespace SourceGen.WpfGui { // length. Either way, we only need to create the descriptor once. (This is // safe because FormatDescriptor instances are immutable.) // - // Because certain details, like the fill byte and high-vs-low ASCII, are pulled - // out of the data stream at format time, we don't have to dig for them now. + // The one exception to this is ASCII values for non-string data, because we have + // to dig the low vs. high value out of the data itself. FormatDescriptor dfd; if (subType == FormatDescriptor.SubType.Symbol) { dfd = FormatDescriptor.Create(chunkLength, symbolRef, @@ -771,8 +774,19 @@ namespace SourceGen.WpfGui { } else { dfd = FormatDescriptor.Create(chunkLength, type, subType); } - while (low <= high) { + if (subType == FormatDescriptor.SubType.ASCII_GENERIC) { + Debug.Assert(dfd.IsNumeric); + int val = RawData.GetWord(mFileData, low, dfd.Length, + type == FormatDescriptor.Type.NumericBE); + FormatDescriptor.SubType actualSubType = (val > 0x7f) ? + FormatDescriptor.SubType.HighAscii : FormatDescriptor.SubType.LowAscii; + if (actualSubType != dfd.FormatSubType) { + // replace the descriptor + dfd = FormatDescriptor.Create(chunkLength, type, actualSubType); + } + } + Results.Add(low, dfd); low += chunkLength; } @@ -833,13 +847,12 @@ namespace SourceGen.WpfGui { /// Offset of first byte. /// Length of string. /// String sub-type. - private void CreateStringOrByte(int offset, int length, - FormatDescriptor.SubType subType) { + private void CreateStringOrByte(int offset, int length, FormatDescriptor.SubType subType) { Debug.Assert(length > 0); if (length == 1) { - // single byte, output as single ASCII char rather than 1-byte string - // TODO(petscii): low/high? - CreateByteFD(offset, FormatDescriptor.SubType.Ascii); + // Single byte, output as single char rather than 1-byte string. We use the + // same encoding as the rest of the string. + CreateByteFD(offset, subType); } else { FormatDescriptor dfd; dfd = FormatDescriptor.Create(length, diff --git a/SourceGen/WpfGui/EditInstructionOperand.xaml.cs b/SourceGen/WpfGui/EditInstructionOperand.xaml.cs index 33b2fdd..8249b17 100644 --- a/SourceGen/WpfGui/EditInstructionOperand.xaml.cs +++ b/SourceGen/WpfGui/EditInstructionOperand.xaml.cs @@ -332,7 +332,8 @@ namespace SourceGen.WpfGui { case FormatDescriptor.SubType.Binary: preview.Append(mFormatter.FormatBinaryValue(mOperandValue, 8)); break; - case FormatDescriptor.SubType.Ascii: + case FormatDescriptor.SubType.LowAscii: + case FormatDescriptor.SubType.HighAscii: // TODO(petscii): encoding preview.Append(mFormatter.FormatAsciiOrHex(mOperandValue)); break; @@ -470,8 +471,9 @@ namespace SourceGen.WpfGui { case FormatDescriptor.SubType.Binary: binaryButton.IsChecked = true; break; - case FormatDescriptor.SubType.Ascii: - // TODO(petscii): encoding + case FormatDescriptor.SubType.LowAscii: + case FormatDescriptor.SubType.HighAscii: + // TODO(petscii): encoding asciiButton.IsChecked = true; break; case FormatDescriptor.SubType.Symbol: @@ -552,7 +554,11 @@ namespace SourceGen.WpfGui { subType = FormatDescriptor.SubType.Binary; } else if (asciiButton.IsChecked == true) { // TODO(petscii): encoding - subType = FormatDescriptor.SubType.Ascii; + if (mOperandValue > 0x7f) { + subType = FormatDescriptor.SubType.HighAscii; + } else { + subType = FormatDescriptor.SubType.LowAscii; + } } else if (symbolButton.IsChecked == true) { subType = FormatDescriptor.SubType.Symbol; } else {