diff --git a/SourceGen/CodeAnalysis.cs b/SourceGen/CodeAnalysis.cs
index 9315b30..7129a4d 100644
--- a/SourceGen/CodeAnalysis.cs
+++ b/SourceGen/CodeAnalysis.cs
@@ -1086,7 +1086,9 @@ namespace SourceGen {
}
if (isStringType) {
- if (!VerifyStringData(offset, length, fmt)) {
+ if (!DataAnalysis.VerifyStringData(mFileData, offset, length, fmt,
+ out string failMsg)) {
+ LogW(offset, failMsg);
return false;
}
} else if (type == DataType.Fill) {
@@ -1111,67 +1113,6 @@ namespace SourceGen {
return true;
}
- ///
- /// Verifies that the string data is what is expected. Does not attempt to check
- /// the character encoding, just the structure.
- ///
- /// True if all is well.
- private bool VerifyStringData(int offset, int length, FormatDescriptor.Type type) {
- switch (type) {
- case FormatDescriptor.Type.StringGeneric:
- case FormatDescriptor.Type.StringReverse:
- return true;
- case FormatDescriptor.Type.StringNullTerm:
- // must end in null byte, and have no null bytes before the end
- int chk = offset;
- while (length-- != 0) {
- byte val = mFileData[chk++];
- if (val == 0x00) {
- if (length != 0) {
- LogW(offset, "found null in middle of null-term string");
- return false;
- } else {
- return true;
- }
- }
- }
- LogW(offset, "no null at end of null-term string");
- return false;
- case FormatDescriptor.Type.StringL8:
- if (mFileData[offset] != length - 1) {
- LogW(offset, "L1 string with mismatched length");
- return false;
- }
- return true;
- case FormatDescriptor.Type.StringL16:
- int len = RawData.GetWord(mFileData, offset, 2, false);
- if (len != length - 2) {
- LogW(offset, "L2 string with mismatched length");
- return false;
- }
- return true;
- case FormatDescriptor.Type.StringDci:
- if (length < 2) {
- LogW(offset, "DCI string is too short");
- return false;
- }
- byte first = (byte) (mFileData[offset] & 0x80);
- for (int i = offset + 1; i < offset + length - 1; i++) {
- if ((mFileData[i] & 0x80) != first) {
- LogW(offset, "mixed DCI string");
- return false;
- }
- }
- if ((mFileData[offset + length - 1] & 0x80) == first) {
- LogW(offset, "DCI string did not end");
- return false;
- }
- return true;
- default:
- Debug.Assert(false);
- return false;
- }
- }
private bool VerifyFillData(int offset, int length) {
byte first = mFileData[offset];
diff --git a/SourceGen/DataAnalysis.cs b/SourceGen/DataAnalysis.cs
index 174edd3..6c014d4 100644
--- a/SourceGen/DataAnalysis.cs
+++ b/SourceGen/DataAnalysis.cs
@@ -1144,6 +1144,77 @@ namespace SourceGen {
return stringCount;
}
+ ///
+ /// Verifies that the string data is what is expected. Does not attempt to check
+ /// the character encoding, just the structure.
+ ///
+ /// Raw data.
+ /// Start offset of string.
+ /// Length of string, including leading length and terminating
+ /// null bytes.
+ /// Expected string type.
+ /// Detailed failure message.
+ /// True if all is well.
+ public static bool VerifyStringData(byte[] fileData, int offset, int length,
+ FormatDescriptor.Type type, out string failMsg) {
+ failMsg = string.Empty;
+
+ switch (type) {
+ case FormatDescriptor.Type.StringGeneric:
+ case FormatDescriptor.Type.StringReverse:
+ return true;
+ case FormatDescriptor.Type.StringNullTerm:
+ // must end in null byte, and have no null bytes before the end
+ int chk = offset;
+ while (length-- != 0) {
+ byte val = fileData[chk++];
+ if (val == 0x00) {
+ if (length != 0) {
+ failMsg = Res.Strings.STR_VFY_NULL_INSIDE_NULL_TERM;
+ return false;
+ } else {
+ return true;
+ }
+ }
+ }
+ failMsg = Res.Strings.STR_VFY_MISSING_NULL_TERM;
+ return false;
+ case FormatDescriptor.Type.StringL8:
+ if (fileData[offset] != length - 1) {
+ failMsg = Res.Strings.STR_VFY_L1_LENGTH_MISMATCH;
+ return false;
+ }
+ return true;
+ case FormatDescriptor.Type.StringL16:
+ int len = RawData.GetWord(fileData, offset, 2, false);
+ if (len != length - 2) {
+ failMsg = Res.Strings.STR_VFY_L2_LENGTH_MISMATCH;
+ return false;
+ }
+ return true;
+ case FormatDescriptor.Type.StringDci:
+ if (length < 2) {
+ failMsg = Res.Strings.STR_VFY_DCI_SHORT;
+ return false;
+ }
+ byte first = (byte)(fileData[offset] & 0x80);
+ for (int i = offset + 1; i < offset + length - 1; i++) {
+ if ((fileData[i] & 0x80) != first) {
+ failMsg = Res.Strings.STR_VFY_DCI_MIXED_DATA;
+ return false;
+ }
+ }
+ if ((fileData[offset + length - 1] & 0x80) == first) {
+ failMsg = Res.Strings.STR_VFY_DCI_NOT_TERMINATED;
+ return false;
+ }
+ return true;
+ default:
+ Debug.Assert(false);
+ return false;
+ }
+ }
+
#endregion // Static analyzers
}
}
diff --git a/SourceGen/DisasmProject.cs b/SourceGen/DisasmProject.cs
index abdd658..f2ee49b 100644
--- a/SourceGen/DisasmProject.cs
+++ b/SourceGen/DisasmProject.cs
@@ -437,6 +437,7 @@ namespace SourceGen {
/// Walks the list of format descriptors, fixing places where the data doesn't match.
///
private void FixAndValidate(ref FileLoadReport report) {
+ // Can't modify a list while we're iterating through it, so gather changes here.
Dictionary changes = new Dictionary();
foreach (KeyValuePair kvp in OperandFormats) {
@@ -505,24 +506,33 @@ namespace SourceGen {
changes[kvp.Key] = newDfd;
Debug.WriteLine("Fix +" + kvp.Key.ToString("x6") + ": " +
dfd + " -> " + newDfd);
+ // possibly interesting, but rarely; very noisy
+ //report.Add(FileLoadItem.Type.Notice,
+ // "Fixed format at +" + kvp.Key.ToString("x6"));
}
}
- // apply changes to main list
- foreach (KeyValuePair kvp in changes) {
- OperandFormats[kvp.Key] = kvp.Value;
- //report.Add(FileLoadItem.Type.Notice,
- // "Fixed format at +" + kvp.Key.ToString("x6"));
+ // Run through the list again, this time looking for badly-formed strings. We're
+ // only checking structure, not character encoding, because you're allowed to have
+ // non-printable characters in strings.
+ foreach (KeyValuePair kvp in OperandFormats) {
+ FormatDescriptor dfd = kvp.Value;
+ if (dfd.IsString && !DataAnalysis.VerifyStringData(FileData, kvp.Key, dfd.Length,
+ dfd.FormatType, out string failMsg)) {
+ report.Add(FileLoadItem.Type.Warning,
+ "+" + kvp.Key.ToString("x6") + ": " + failMsg);
+ changes[kvp.Key] = null;
+ }
}
- // TODO: validate strings
- // - null-terminated strings must not have 0x00 bytes, except for the last byte,
- // which must be 0x00
- // - the length stored in L8/L16 strings much match the format descriptor length
- // - DCI strings must have the appropriate pattern for the high bit
- //
- // Note it is not required that string data match the encoding, since you're allowed
- // to have random gunk mixed in. It just can't violate the above rules.
+ // Apply changes to main list.
+ foreach (KeyValuePair kvp in changes) {
+ if (kvp.Value == null) {
+ OperandFormats.Remove(kvp.Key);
+ } else {
+ OperandFormats[kvp.Key] = kvp.Value;
+ }
+ }
}
///
@@ -821,20 +831,28 @@ namespace SourceGen {
/// Applies user-defined format descriptors to the Anattribs array. This specifies the
/// format for instruction operands, and identifies data items.
///
+ ///
+ /// In an ideal world, this would be a trivial function. In practice it's possible for
+ /// all sorts of weird edge cases to arise, e.g. if you hint something as data, apply
+ /// formats, and then hint it as code, many strange things are possible. We don't want
+ /// to delete user data if it seems out of place, but we do want to ignore anything
+ /// that's going to confuse the source generator later on.
+ ///
+ /// Problem reports are written to a log (which is shown by the Analyzer Output
+ /// window) and the Problems list. Once the latter is better established we can
+ /// stop sending them to the log.
+ ///
/// Log for debug messages.
private void ApplyFormatDescriptors(DebugLog genLog) {
genLog.LogI("Applying format descriptors");
+ // TODO(someday): move error format strings to string dictionary
+
foreach (KeyValuePair kvp in OperandFormats) {
int offset = kvp.Key;
- // If you hint as data, apply formats, and then hint as code, all sorts
- // of strange things can happen. We want to ignore anything that doesn't
- // appear to be valid. While we're at it, we do some internal consistency
- // checks in the name of catching bugs as soon as possible.
-
// Check offset.
- if (offset < 0 || offset >= mAnattribs.Length) {
+ if (offset < 0 || offset >= mFileData.Length) {
string msg = "invalid offset (desc=" + kvp.Value + ")";
genLog.LogE("+" + offset.ToString("x6") + ": " + msg);
Problems.Add(new ProblemList.ProblemEntry(
@@ -844,13 +862,13 @@ namespace SourceGen {
msg,
ProblemList.ProblemEntry.ProblemResolution.FormatDescriptorIgnored));
Debug.Assert(false);
- continue; // ignore this one
+ continue;
}
// Make sure it doesn't run off the end
- if (offset + kvp.Value.Length > mAnattribs.Length) {
+ if (offset + kvp.Value.Length > mFileData.Length) {
string msg = "invalid offset+len: len=" + kvp.Value.Length +
- " file=" + mAnattribs.Length;
+ " file=" + mFileData.Length;
genLog.LogE("+" + offset.ToString("x6") + ": " + msg);
Problems.Add(new ProblemList.ProblemEntry(
ProblemList.ProblemEntry.SeverityLevel.Error,
@@ -859,7 +877,19 @@ namespace SourceGen {
msg,
ProblemList.ProblemEntry.ProblemResolution.FormatDescriptorIgnored));
Debug.Assert(false);
- continue; // ignore this one
+ continue;
+ }
+
+ if (!AddrMap.IsContiguous(offset, kvp.Value.Length)) {
+ string msg = "descriptor straddles address change; len=" + kvp.Value.Length;
+ genLog.LogE("+" + offset.ToString("x6") + ": " + msg);
+ Problems.Add(new ProblemList.ProblemEntry(
+ ProblemList.ProblemEntry.SeverityLevel.Error,
+ offset,
+ ProblemList.ProblemEntry.ProblemType.InvalidOffsetOrLength,
+ msg,
+ ProblemList.ProblemEntry.ProblemResolution.FormatDescriptorIgnored));
+ continue;
}
if (mAnattribs[offset].IsInstructionStart) {
@@ -876,7 +906,7 @@ namespace SourceGen {
ProblemList.ProblemEntry.ProblemType.InvalidOffsetOrLength,
msg,
ProblemList.ProblemEntry.ProblemResolution.FormatDescriptorIgnored));
- continue; // ignore this one
+ continue;
}
if (kvp.Value.Length == 1) {
// No operand to format!
@@ -888,7 +918,7 @@ namespace SourceGen {
ProblemList.ProblemEntry.ProblemType.InvalidDescriptor,
msg,
ProblemList.ProblemEntry.ProblemResolution.FormatDescriptorIgnored));
- continue; // ignore this one
+ continue;
}
if (!kvp.Value.IsValidForInstruction) {
string msg = "descriptor not valid for instruction: " + kvp.Value;
@@ -899,7 +929,7 @@ namespace SourceGen {
ProblemList.ProblemEntry.ProblemType.InvalidDescriptor,
msg,
ProblemList.ProblemEntry.ProblemResolution.FormatDescriptorIgnored));
- continue; // ignore this one
+ continue;
}
} else if (mAnattribs[offset].IsInstruction) {
// Mid-instruction format.
@@ -911,7 +941,7 @@ namespace SourceGen {
ProblemList.ProblemEntry.ProblemType.InvalidDescriptor,
msg,
ProblemList.ProblemEntry.ProblemResolution.FormatDescriptorIgnored));
- continue; // ignore this one
+ continue;
} else {
// Data or inline data. The data analyzer hasn't run yet. We want to
// confirm that the descriptor doesn't overlap with code.
@@ -946,6 +976,7 @@ namespace SourceGen {
}
}
+ // All tests passed. Apply the descriptor.
mAnattribs[offset].DataDescriptor = kvp.Value;
}
}
diff --git a/SourceGen/Res/Strings.xaml b/SourceGen/Res/Strings.xaml
index ce1ac4a..3ff9b4a 100644
--- a/SourceGen/Res/Strings.xaml
+++ b/SourceGen/Res/Strings.xaml
@@ -135,6 +135,13 @@ limitations under the License.
{1} CPU @ {2} MHz
Show
Ready
+ DCI string has mixed data
+ DCI string not terminated
+ DCI string is too short
+ length of string doesn't match length byte
+ length of string doesn't match length word
+ null-terminated string doesn't end with null byte
+ found null byte in the middle of null-terminated string
Symbol Import
Imported {0} global symbols.
No global+export symbols were found.
diff --git a/SourceGen/Res/Strings.xaml.cs b/SourceGen/Res/Strings.xaml.cs
index 9fd40c4..450aab2 100644
--- a/SourceGen/Res/Strings.xaml.cs
+++ b/SourceGen/Res/Strings.xaml.cs
@@ -251,6 +251,20 @@ namespace SourceGen.Res {
(string)Application.Current.FindResource("str_ShowCol");
public static string STATUS_READY =
(string)Application.Current.FindResource("str_StatusReady");
+ public static string STR_VFY_DCI_MIXED_DATA =
+ (string)Application.Current.FindResource("str_StrVfyDciMixedData");
+ public static string STR_VFY_DCI_NOT_TERMINATED =
+ (string)Application.Current.FindResource("str_StrVfyDciNotTerminated");
+ public static string STR_VFY_DCI_SHORT =
+ (string)Application.Current.FindResource("str_StrVfyDciShort");
+ public static string STR_VFY_L1_LENGTH_MISMATCH =
+ (string)Application.Current.FindResource("str_StrVfyL1LengthMismatch");
+ public static string STR_VFY_L2_LENGTH_MISMATCH =
+ (string)Application.Current.FindResource("str_StrVfyL2LengthMismatch");
+ public static string STR_VFY_MISSING_NULL_TERM =
+ (string)Application.Current.FindResource("str_StrVfyMissingNullTerm");
+ public static string STR_VFY_NULL_INSIDE_NULL_TERM =
+ (string)Application.Current.FindResource("str_StrVfyNullInsideNullTerm");
public static string SYMBOL_IMPORT_CAPTION =
(string)Application.Current.FindResource("str_SymbolImportCaption");
public static string SYMBOL_IMPORT_GOOD_FMT =
diff --git a/SourceGen/RuntimeData/Apple/F8-ROM.sym65 b/SourceGen/RuntimeData/Apple/F8-ROM.sym65
index cd31c37..f01843e 100644
--- a/SourceGen/RuntimeData/Apple/F8-ROM.sym65
+++ b/SourceGen/RuntimeData/Apple/F8-ROM.sym65
@@ -60,7 +60,7 @@ MON_PRNTYX @ $F940 ;print Y-reg/X-reg as 4 hex digits
MON_PRNTAX @ $F941 ;print Acc/X-reg as 4 hex digits
MON_PRNTX @ $F944 ;print X-reg as 2 hex digits
MON_PRBLNK @ $F948 ;print 3 spaces
-MON_PRBL2 @ $F94A
+MON_PRBL2 @ $F94A ;print multiple spaces, count in X-reg
MON_PCADJ @ $F953 ;monitor/mini-asm PC adjust
MON_TEXT2COPY @ $F962
MON_OLDIRQ @ $FA40 ;autostart ROM IRQ handler