diff --git a/SourceGen/CodeAnalysis.cs b/SourceGen/CodeAnalysis.cs index 9315b30..7129a4d 100644 --- a/SourceGen/CodeAnalysis.cs +++ b/SourceGen/CodeAnalysis.cs @@ -1086,7 +1086,9 @@ namespace SourceGen { } if (isStringType) { - if (!VerifyStringData(offset, length, fmt)) { + if (!DataAnalysis.VerifyStringData(mFileData, offset, length, fmt, + out string failMsg)) { + LogW(offset, failMsg); return false; } } else if (type == DataType.Fill) { @@ -1111,67 +1113,6 @@ namespace SourceGen { return true; } - /// - /// Verifies that the string data is what is expected. Does not attempt to check - /// the character encoding, just the structure. - /// - /// True if all is well. - private bool VerifyStringData(int offset, int length, FormatDescriptor.Type type) { - switch (type) { - case FormatDescriptor.Type.StringGeneric: - case FormatDescriptor.Type.StringReverse: - return true; - case FormatDescriptor.Type.StringNullTerm: - // must end in null byte, and have no null bytes before the end - int chk = offset; - while (length-- != 0) { - byte val = mFileData[chk++]; - if (val == 0x00) { - if (length != 0) { - LogW(offset, "found null in middle of null-term string"); - return false; - } else { - return true; - } - } - } - LogW(offset, "no null at end of null-term string"); - return false; - case FormatDescriptor.Type.StringL8: - if (mFileData[offset] != length - 1) { - LogW(offset, "L1 string with mismatched length"); - return false; - } - return true; - case FormatDescriptor.Type.StringL16: - int len = RawData.GetWord(mFileData, offset, 2, false); - if (len != length - 2) { - LogW(offset, "L2 string with mismatched length"); - return false; - } - return true; - case FormatDescriptor.Type.StringDci: - if (length < 2) { - LogW(offset, "DCI string is too short"); - return false; - } - byte first = (byte) (mFileData[offset] & 0x80); - for (int i = offset + 1; i < offset + length - 1; i++) { - if ((mFileData[i] & 0x80) != first) { - LogW(offset, "mixed DCI string"); - return false; - } - } - if ((mFileData[offset + length - 1] & 0x80) == first) { - LogW(offset, "DCI string did not end"); - return false; - } - return true; - default: - Debug.Assert(false); - return false; - } - } private bool VerifyFillData(int offset, int length) { byte first = mFileData[offset]; diff --git a/SourceGen/DataAnalysis.cs b/SourceGen/DataAnalysis.cs index 174edd3..6c014d4 100644 --- a/SourceGen/DataAnalysis.cs +++ b/SourceGen/DataAnalysis.cs @@ -1144,6 +1144,77 @@ namespace SourceGen { return stringCount; } + /// + /// Verifies that the string data is what is expected. Does not attempt to check + /// the character encoding, just the structure. + /// + /// Raw data. + /// Start offset of string. + /// Length of string, including leading length and terminating + /// null bytes. + /// Expected string type. + /// Detailed failure message. + /// True if all is well. + public static bool VerifyStringData(byte[] fileData, int offset, int length, + FormatDescriptor.Type type, out string failMsg) { + failMsg = string.Empty; + + switch (type) { + case FormatDescriptor.Type.StringGeneric: + case FormatDescriptor.Type.StringReverse: + return true; + case FormatDescriptor.Type.StringNullTerm: + // must end in null byte, and have no null bytes before the end + int chk = offset; + while (length-- != 0) { + byte val = fileData[chk++]; + if (val == 0x00) { + if (length != 0) { + failMsg = Res.Strings.STR_VFY_NULL_INSIDE_NULL_TERM; + return false; + } else { + return true; + } + } + } + failMsg = Res.Strings.STR_VFY_MISSING_NULL_TERM; + return false; + case FormatDescriptor.Type.StringL8: + if (fileData[offset] != length - 1) { + failMsg = Res.Strings.STR_VFY_L1_LENGTH_MISMATCH; + return false; + } + return true; + case FormatDescriptor.Type.StringL16: + int len = RawData.GetWord(fileData, offset, 2, false); + if (len != length - 2) { + failMsg = Res.Strings.STR_VFY_L2_LENGTH_MISMATCH; + return false; + } + return true; + case FormatDescriptor.Type.StringDci: + if (length < 2) { + failMsg = Res.Strings.STR_VFY_DCI_SHORT; + return false; + } + byte first = (byte)(fileData[offset] & 0x80); + for (int i = offset + 1; i < offset + length - 1; i++) { + if ((fileData[i] & 0x80) != first) { + failMsg = Res.Strings.STR_VFY_DCI_MIXED_DATA; + return false; + } + } + if ((fileData[offset + length - 1] & 0x80) == first) { + failMsg = Res.Strings.STR_VFY_DCI_NOT_TERMINATED; + return false; + } + return true; + default: + Debug.Assert(false); + return false; + } + } + #endregion // Static analyzers } } diff --git a/SourceGen/DisasmProject.cs b/SourceGen/DisasmProject.cs index abdd658..f2ee49b 100644 --- a/SourceGen/DisasmProject.cs +++ b/SourceGen/DisasmProject.cs @@ -437,6 +437,7 @@ namespace SourceGen { /// Walks the list of format descriptors, fixing places where the data doesn't match. /// private void FixAndValidate(ref FileLoadReport report) { + // Can't modify a list while we're iterating through it, so gather changes here. Dictionary changes = new Dictionary(); foreach (KeyValuePair kvp in OperandFormats) { @@ -505,24 +506,33 @@ namespace SourceGen { changes[kvp.Key] = newDfd; Debug.WriteLine("Fix +" + kvp.Key.ToString("x6") + ": " + dfd + " -> " + newDfd); + // possibly interesting, but rarely; very noisy + //report.Add(FileLoadItem.Type.Notice, + // "Fixed format at +" + kvp.Key.ToString("x6")); } } - // apply changes to main list - foreach (KeyValuePair kvp in changes) { - OperandFormats[kvp.Key] = kvp.Value; - //report.Add(FileLoadItem.Type.Notice, - // "Fixed format at +" + kvp.Key.ToString("x6")); + // Run through the list again, this time looking for badly-formed strings. We're + // only checking structure, not character encoding, because you're allowed to have + // non-printable characters in strings. + foreach (KeyValuePair kvp in OperandFormats) { + FormatDescriptor dfd = kvp.Value; + if (dfd.IsString && !DataAnalysis.VerifyStringData(FileData, kvp.Key, dfd.Length, + dfd.FormatType, out string failMsg)) { + report.Add(FileLoadItem.Type.Warning, + "+" + kvp.Key.ToString("x6") + ": " + failMsg); + changes[kvp.Key] = null; + } } - // TODO: validate strings - // - null-terminated strings must not have 0x00 bytes, except for the last byte, - // which must be 0x00 - // - the length stored in L8/L16 strings much match the format descriptor length - // - DCI strings must have the appropriate pattern for the high bit - // - // Note it is not required that string data match the encoding, since you're allowed - // to have random gunk mixed in. It just can't violate the above rules. + // Apply changes to main list. + foreach (KeyValuePair kvp in changes) { + if (kvp.Value == null) { + OperandFormats.Remove(kvp.Key); + } else { + OperandFormats[kvp.Key] = kvp.Value; + } + } } /// @@ -821,20 +831,28 @@ namespace SourceGen { /// Applies user-defined format descriptors to the Anattribs array. This specifies the /// format for instruction operands, and identifies data items. /// + /// + /// In an ideal world, this would be a trivial function. In practice it's possible for + /// all sorts of weird edge cases to arise, e.g. if you hint something as data, apply + /// formats, and then hint it as code, many strange things are possible. We don't want + /// to delete user data if it seems out of place, but we do want to ignore anything + /// that's going to confuse the source generator later on. + /// + /// Problem reports are written to a log (which is shown by the Analyzer Output + /// window) and the Problems list. Once the latter is better established we can + /// stop sending them to the log. + /// /// Log for debug messages. private void ApplyFormatDescriptors(DebugLog genLog) { genLog.LogI("Applying format descriptors"); + // TODO(someday): move error format strings to string dictionary + foreach (KeyValuePair kvp in OperandFormats) { int offset = kvp.Key; - // If you hint as data, apply formats, and then hint as code, all sorts - // of strange things can happen. We want to ignore anything that doesn't - // appear to be valid. While we're at it, we do some internal consistency - // checks in the name of catching bugs as soon as possible. - // Check offset. - if (offset < 0 || offset >= mAnattribs.Length) { + if (offset < 0 || offset >= mFileData.Length) { string msg = "invalid offset (desc=" + kvp.Value + ")"; genLog.LogE("+" + offset.ToString("x6") + ": " + msg); Problems.Add(new ProblemList.ProblemEntry( @@ -844,13 +862,13 @@ namespace SourceGen { msg, ProblemList.ProblemEntry.ProblemResolution.FormatDescriptorIgnored)); Debug.Assert(false); - continue; // ignore this one + continue; } // Make sure it doesn't run off the end - if (offset + kvp.Value.Length > mAnattribs.Length) { + if (offset + kvp.Value.Length > mFileData.Length) { string msg = "invalid offset+len: len=" + kvp.Value.Length + - " file=" + mAnattribs.Length; + " file=" + mFileData.Length; genLog.LogE("+" + offset.ToString("x6") + ": " + msg); Problems.Add(new ProblemList.ProblemEntry( ProblemList.ProblemEntry.SeverityLevel.Error, @@ -859,7 +877,19 @@ namespace SourceGen { msg, ProblemList.ProblemEntry.ProblemResolution.FormatDescriptorIgnored)); Debug.Assert(false); - continue; // ignore this one + continue; + } + + if (!AddrMap.IsContiguous(offset, kvp.Value.Length)) { + string msg = "descriptor straddles address change; len=" + kvp.Value.Length; + genLog.LogE("+" + offset.ToString("x6") + ": " + msg); + Problems.Add(new ProblemList.ProblemEntry( + ProblemList.ProblemEntry.SeverityLevel.Error, + offset, + ProblemList.ProblemEntry.ProblemType.InvalidOffsetOrLength, + msg, + ProblemList.ProblemEntry.ProblemResolution.FormatDescriptorIgnored)); + continue; } if (mAnattribs[offset].IsInstructionStart) { @@ -876,7 +906,7 @@ namespace SourceGen { ProblemList.ProblemEntry.ProblemType.InvalidOffsetOrLength, msg, ProblemList.ProblemEntry.ProblemResolution.FormatDescriptorIgnored)); - continue; // ignore this one + continue; } if (kvp.Value.Length == 1) { // No operand to format! @@ -888,7 +918,7 @@ namespace SourceGen { ProblemList.ProblemEntry.ProblemType.InvalidDescriptor, msg, ProblemList.ProblemEntry.ProblemResolution.FormatDescriptorIgnored)); - continue; // ignore this one + continue; } if (!kvp.Value.IsValidForInstruction) { string msg = "descriptor not valid for instruction: " + kvp.Value; @@ -899,7 +929,7 @@ namespace SourceGen { ProblemList.ProblemEntry.ProblemType.InvalidDescriptor, msg, ProblemList.ProblemEntry.ProblemResolution.FormatDescriptorIgnored)); - continue; // ignore this one + continue; } } else if (mAnattribs[offset].IsInstruction) { // Mid-instruction format. @@ -911,7 +941,7 @@ namespace SourceGen { ProblemList.ProblemEntry.ProblemType.InvalidDescriptor, msg, ProblemList.ProblemEntry.ProblemResolution.FormatDescriptorIgnored)); - continue; // ignore this one + continue; } else { // Data or inline data. The data analyzer hasn't run yet. We want to // confirm that the descriptor doesn't overlap with code. @@ -946,6 +976,7 @@ namespace SourceGen { } } + // All tests passed. Apply the descriptor. mAnattribs[offset].DataDescriptor = kvp.Value; } } diff --git a/SourceGen/Res/Strings.xaml b/SourceGen/Res/Strings.xaml index ce1ac4a..3ff9b4a 100644 --- a/SourceGen/Res/Strings.xaml +++ b/SourceGen/Res/Strings.xaml @@ -135,6 +135,13 @@ limitations under the License. {1} CPU @ {2} MHz Show Ready + DCI string has mixed data + DCI string not terminated + DCI string is too short + length of string doesn't match length byte + length of string doesn't match length word + null-terminated string doesn't end with null byte + found null byte in the middle of null-terminated string Symbol Import Imported {0} global symbols. No global+export symbols were found. diff --git a/SourceGen/Res/Strings.xaml.cs b/SourceGen/Res/Strings.xaml.cs index 9fd40c4..450aab2 100644 --- a/SourceGen/Res/Strings.xaml.cs +++ b/SourceGen/Res/Strings.xaml.cs @@ -251,6 +251,20 @@ namespace SourceGen.Res { (string)Application.Current.FindResource("str_ShowCol"); public static string STATUS_READY = (string)Application.Current.FindResource("str_StatusReady"); + public static string STR_VFY_DCI_MIXED_DATA = + (string)Application.Current.FindResource("str_StrVfyDciMixedData"); + public static string STR_VFY_DCI_NOT_TERMINATED = + (string)Application.Current.FindResource("str_StrVfyDciNotTerminated"); + public static string STR_VFY_DCI_SHORT = + (string)Application.Current.FindResource("str_StrVfyDciShort"); + public static string STR_VFY_L1_LENGTH_MISMATCH = + (string)Application.Current.FindResource("str_StrVfyL1LengthMismatch"); + public static string STR_VFY_L2_LENGTH_MISMATCH = + (string)Application.Current.FindResource("str_StrVfyL2LengthMismatch"); + public static string STR_VFY_MISSING_NULL_TERM = + (string)Application.Current.FindResource("str_StrVfyMissingNullTerm"); + public static string STR_VFY_NULL_INSIDE_NULL_TERM = + (string)Application.Current.FindResource("str_StrVfyNullInsideNullTerm"); public static string SYMBOL_IMPORT_CAPTION = (string)Application.Current.FindResource("str_SymbolImportCaption"); public static string SYMBOL_IMPORT_GOOD_FMT = diff --git a/SourceGen/RuntimeData/Apple/F8-ROM.sym65 b/SourceGen/RuntimeData/Apple/F8-ROM.sym65 index cd31c37..f01843e 100644 --- a/SourceGen/RuntimeData/Apple/F8-ROM.sym65 +++ b/SourceGen/RuntimeData/Apple/F8-ROM.sym65 @@ -60,7 +60,7 @@ MON_PRNTYX @ $F940 ;print Y-reg/X-reg as 4 hex digits MON_PRNTAX @ $F941 ;print Acc/X-reg as 4 hex digits MON_PRNTX @ $F944 ;print X-reg as 2 hex digits MON_PRBLNK @ $F948 ;print 3 spaces -MON_PRBL2 @ $F94A +MON_PRBL2 @ $F94A ;print multiple spaces, count in X-reg MON_PCADJ @ $F953 ;monitor/mini-asm PC adjust MON_TEXT2COPY @ $F962 MON_OLDIRQ @ $FA40 ;autostart ROM IRQ handler