1
0
mirror of https://github.com/fadden/6502bench.git synced 2024-11-29 10:50:28 +00:00

Check formatted string structure at load time

If we have a bug, or somebody edits the project file manually, we
can end up with a very wrong string, such as a null-terminated
string that isn't, or a DCI string that has a mix of high and low
ASCII from start to finish.  We now check all incoming strings for
validity, and discard any that fail the test.  The verification
code is shared with the extension script inline data formatter.

Also, added a comment to an F8-ROM symbol I stumbled over.
This commit is contained in:
Andy McFadden 2019-10-06 17:07:07 -07:00
parent c4fe759efc
commit 8c87ce3004
6 changed files with 154 additions and 90 deletions

View File

@ -1086,7 +1086,9 @@ namespace SourceGen {
}
if (isStringType) {
if (!VerifyStringData(offset, length, fmt)) {
if (!DataAnalysis.VerifyStringData(mFileData, offset, length, fmt,
out string failMsg)) {
LogW(offset, failMsg);
return false;
}
} else if (type == DataType.Fill) {
@ -1111,67 +1113,6 @@ namespace SourceGen {
return true;
}
/// <summary>
/// Verifies that the string data is what is expected. Does not attempt to check
/// the character encoding, just the structure.
/// </summary>
/// <returns>True if all is well.</returns>
private bool VerifyStringData(int offset, int length, FormatDescriptor.Type type) {
switch (type) {
case FormatDescriptor.Type.StringGeneric:
case FormatDescriptor.Type.StringReverse:
return true;
case FormatDescriptor.Type.StringNullTerm:
// must end in null byte, and have no null bytes before the end
int chk = offset;
while (length-- != 0) {
byte val = mFileData[chk++];
if (val == 0x00) {
if (length != 0) {
LogW(offset, "found null in middle of null-term string");
return false;
} else {
return true;
}
}
}
LogW(offset, "no null at end of null-term string");
return false;
case FormatDescriptor.Type.StringL8:
if (mFileData[offset] != length - 1) {
LogW(offset, "L1 string with mismatched length");
return false;
}
return true;
case FormatDescriptor.Type.StringL16:
int len = RawData.GetWord(mFileData, offset, 2, false);
if (len != length - 2) {
LogW(offset, "L2 string with mismatched length");
return false;
}
return true;
case FormatDescriptor.Type.StringDci:
if (length < 2) {
LogW(offset, "DCI string is too short");
return false;
}
byte first = (byte) (mFileData[offset] & 0x80);
for (int i = offset + 1; i < offset + length - 1; i++) {
if ((mFileData[i] & 0x80) != first) {
LogW(offset, "mixed DCI string");
return false;
}
}
if ((mFileData[offset + length - 1] & 0x80) == first) {
LogW(offset, "DCI string did not end");
return false;
}
return true;
default:
Debug.Assert(false);
return false;
}
}
private bool VerifyFillData(int offset, int length) {
byte first = mFileData[offset];

View File

@ -1144,6 +1144,77 @@ namespace SourceGen {
return stringCount;
}
/// <summary>
/// Verifies that the string data is what is expected. Does not attempt to check
/// the character encoding, just the structure.
/// </summary>
/// <param name="fileData">Raw data.</param>
/// <param name="offset">Start offset of string.</param>
/// <param name="length">Length of string, including leading length and terminating
/// null bytes.</param>
/// <param name="type">Expected string type.</param>
/// <param name="failMsg">Detailed failure message.</param>
/// <returns>True if all is well.</returns>
public static bool VerifyStringData(byte[] fileData, int offset, int length,
FormatDescriptor.Type type, out string failMsg) {
failMsg = string.Empty;
switch (type) {
case FormatDescriptor.Type.StringGeneric:
case FormatDescriptor.Type.StringReverse:
return true;
case FormatDescriptor.Type.StringNullTerm:
// must end in null byte, and have no null bytes before the end
int chk = offset;
while (length-- != 0) {
byte val = fileData[chk++];
if (val == 0x00) {
if (length != 0) {
failMsg = Res.Strings.STR_VFY_NULL_INSIDE_NULL_TERM;
return false;
} else {
return true;
}
}
}
failMsg = Res.Strings.STR_VFY_MISSING_NULL_TERM;
return false;
case FormatDescriptor.Type.StringL8:
if (fileData[offset] != length - 1) {
failMsg = Res.Strings.STR_VFY_L1_LENGTH_MISMATCH;
return false;
}
return true;
case FormatDescriptor.Type.StringL16:
int len = RawData.GetWord(fileData, offset, 2, false);
if (len != length - 2) {
failMsg = Res.Strings.STR_VFY_L2_LENGTH_MISMATCH;
return false;
}
return true;
case FormatDescriptor.Type.StringDci:
if (length < 2) {
failMsg = Res.Strings.STR_VFY_DCI_SHORT;
return false;
}
byte first = (byte)(fileData[offset] & 0x80);
for (int i = offset + 1; i < offset + length - 1; i++) {
if ((fileData[i] & 0x80) != first) {
failMsg = Res.Strings.STR_VFY_DCI_MIXED_DATA;
return false;
}
}
if ((fileData[offset + length - 1] & 0x80) == first) {
failMsg = Res.Strings.STR_VFY_DCI_NOT_TERMINATED;
return false;
}
return true;
default:
Debug.Assert(false);
return false;
}
}
#endregion // Static analyzers
}
}

View File

@ -437,6 +437,7 @@ namespace SourceGen {
/// Walks the list of format descriptors, fixing places where the data doesn't match.
/// </summary>
private void FixAndValidate(ref FileLoadReport report) {
// Can't modify a list while we're iterating through it, so gather changes here.
Dictionary<int, FormatDescriptor> changes = new Dictionary<int, FormatDescriptor>();
foreach (KeyValuePair<int, FormatDescriptor> kvp in OperandFormats) {
@ -505,24 +506,33 @@ namespace SourceGen {
changes[kvp.Key] = newDfd;
Debug.WriteLine("Fix +" + kvp.Key.ToString("x6") + ": " +
dfd + " -> " + newDfd);
// possibly interesting, but rarely; very noisy
//report.Add(FileLoadItem.Type.Notice,
// "Fixed format at +" + kvp.Key.ToString("x6"));
}
}
// apply changes to main list
foreach (KeyValuePair<int, FormatDescriptor> kvp in changes) {
OperandFormats[kvp.Key] = kvp.Value;
//report.Add(FileLoadItem.Type.Notice,
// "Fixed format at +" + kvp.Key.ToString("x6"));
// Run through the list again, this time looking for badly-formed strings. We're
// only checking structure, not character encoding, because you're allowed to have
// non-printable characters in strings.
foreach (KeyValuePair<int, FormatDescriptor> kvp in OperandFormats) {
FormatDescriptor dfd = kvp.Value;
if (dfd.IsString && !DataAnalysis.VerifyStringData(FileData, kvp.Key, dfd.Length,
dfd.FormatType, out string failMsg)) {
report.Add(FileLoadItem.Type.Warning,
"+" + kvp.Key.ToString("x6") + ": " + failMsg);
changes[kvp.Key] = null;
}
}
// TODO: validate strings
// - null-terminated strings must not have 0x00 bytes, except for the last byte,
// which must be 0x00
// - the length stored in L8/L16 strings much match the format descriptor length
// - DCI strings must have the appropriate pattern for the high bit
//
// Note it is not required that string data match the encoding, since you're allowed
// to have random gunk mixed in. It just can't violate the above rules.
// Apply changes to main list.
foreach (KeyValuePair<int, FormatDescriptor> kvp in changes) {
if (kvp.Value == null) {
OperandFormats.Remove(kvp.Key);
} else {
OperandFormats[kvp.Key] = kvp.Value;
}
}
}
/// <summary>
@ -821,20 +831,28 @@ namespace SourceGen {
/// Applies user-defined format descriptors to the Anattribs array. This specifies the
/// format for instruction operands, and identifies data items.
/// </summary>
/// <remarks>
/// In an ideal world, this would be a trivial function. In practice it's possible for
/// all sorts of weird edge cases to arise, e.g. if you hint something as data, apply
/// formats, and then hint it as code, many strange things are possible. We don't want
/// to delete user data if it seems out of place, but we do want to ignore anything
/// that's going to confuse the source generator later on.
///
/// Problem reports are written to a log (which is shown by the Analyzer Output
/// window) and the Problems list. Once the latter is better established we can
/// stop sending them to the log.
/// </remarks>
/// <param name="genLog">Log for debug messages.</param>
private void ApplyFormatDescriptors(DebugLog genLog) {
genLog.LogI("Applying format descriptors");
// TODO(someday): move error format strings to string dictionary
foreach (KeyValuePair<int, FormatDescriptor> kvp in OperandFormats) {
int offset = kvp.Key;
// If you hint as data, apply formats, and then hint as code, all sorts
// of strange things can happen. We want to ignore anything that doesn't
// appear to be valid. While we're at it, we do some internal consistency
// checks in the name of catching bugs as soon as possible.
// Check offset.
if (offset < 0 || offset >= mAnattribs.Length) {
if (offset < 0 || offset >= mFileData.Length) {
string msg = "invalid offset (desc=" + kvp.Value + ")";
genLog.LogE("+" + offset.ToString("x6") + ": " + msg);
Problems.Add(new ProblemList.ProblemEntry(
@ -844,13 +862,13 @@ namespace SourceGen {
msg,
ProblemList.ProblemEntry.ProblemResolution.FormatDescriptorIgnored));
Debug.Assert(false);
continue; // ignore this one
continue;
}
// Make sure it doesn't run off the end
if (offset + kvp.Value.Length > mAnattribs.Length) {
if (offset + kvp.Value.Length > mFileData.Length) {
string msg = "invalid offset+len: len=" + kvp.Value.Length +
" file=" + mAnattribs.Length;
" file=" + mFileData.Length;
genLog.LogE("+" + offset.ToString("x6") + ": " + msg);
Problems.Add(new ProblemList.ProblemEntry(
ProblemList.ProblemEntry.SeverityLevel.Error,
@ -859,7 +877,19 @@ namespace SourceGen {
msg,
ProblemList.ProblemEntry.ProblemResolution.FormatDescriptorIgnored));
Debug.Assert(false);
continue; // ignore this one
continue;
}
if (!AddrMap.IsContiguous(offset, kvp.Value.Length)) {
string msg = "descriptor straddles address change; len=" + kvp.Value.Length;
genLog.LogE("+" + offset.ToString("x6") + ": " + msg);
Problems.Add(new ProblemList.ProblemEntry(
ProblemList.ProblemEntry.SeverityLevel.Error,
offset,
ProblemList.ProblemEntry.ProblemType.InvalidOffsetOrLength,
msg,
ProblemList.ProblemEntry.ProblemResolution.FormatDescriptorIgnored));
continue;
}
if (mAnattribs[offset].IsInstructionStart) {
@ -876,7 +906,7 @@ namespace SourceGen {
ProblemList.ProblemEntry.ProblemType.InvalidOffsetOrLength,
msg,
ProblemList.ProblemEntry.ProblemResolution.FormatDescriptorIgnored));
continue; // ignore this one
continue;
}
if (kvp.Value.Length == 1) {
// No operand to format!
@ -888,7 +918,7 @@ namespace SourceGen {
ProblemList.ProblemEntry.ProblemType.InvalidDescriptor,
msg,
ProblemList.ProblemEntry.ProblemResolution.FormatDescriptorIgnored));
continue; // ignore this one
continue;
}
if (!kvp.Value.IsValidForInstruction) {
string msg = "descriptor not valid for instruction: " + kvp.Value;
@ -899,7 +929,7 @@ namespace SourceGen {
ProblemList.ProblemEntry.ProblemType.InvalidDescriptor,
msg,
ProblemList.ProblemEntry.ProblemResolution.FormatDescriptorIgnored));
continue; // ignore this one
continue;
}
} else if (mAnattribs[offset].IsInstruction) {
// Mid-instruction format.
@ -911,7 +941,7 @@ namespace SourceGen {
ProblemList.ProblemEntry.ProblemType.InvalidDescriptor,
msg,
ProblemList.ProblemEntry.ProblemResolution.FormatDescriptorIgnored));
continue; // ignore this one
continue;
} else {
// Data or inline data. The data analyzer hasn't run yet. We want to
// confirm that the descriptor doesn't overlap with code.
@ -946,6 +976,7 @@ namespace SourceGen {
}
}
// All tests passed. Apply the descriptor.
mAnattribs[offset].DataDescriptor = kvp.Value;
}
}

View File

@ -135,6 +135,13 @@ limitations under the License.
<system:String x:Key="str_SetupSystemSummaryFmt">{1} CPU @ {2} MHz</system:String>
<system:String x:Key="str_ShowCol">Show</system:String>
<system:String x:Key="str_StatusReady">Ready</system:String>
<system:String x:Key="str_StrVfyDciMixedData">DCI string has mixed data</system:String>
<system:String x:Key="str_StrVfyDciNotTerminated">DCI string not terminated</system:String>
<system:String x:Key="str_StrVfyDciShort">DCI string is too short</system:String>
<system:String x:Key="str_StrVfyL1LengthMismatch">length of string doesn't match length byte</system:String>
<system:String x:Key="str_StrVfyL2LengthMismatch">length of string doesn't match length word</system:String>
<system:String x:Key="str_StrVfyMissingNullTerm">null-terminated string doesn't end with null byte</system:String>
<system:String x:Key="str_StrVfyNullInsideNullTerm">found null byte in the middle of null-terminated string</system:String>
<system:String x:Key="str_SymbolImportCaption">Symbol Import</system:String>
<system:String x:Key="str_SymbolImportGoodFmt">Imported {0} global symbols.</system:String>
<system:String x:Key="str_SymbolImportNone">No global+export symbols were found.</system:String>

View File

@ -251,6 +251,20 @@ namespace SourceGen.Res {
(string)Application.Current.FindResource("str_ShowCol");
public static string STATUS_READY =
(string)Application.Current.FindResource("str_StatusReady");
public static string STR_VFY_DCI_MIXED_DATA =
(string)Application.Current.FindResource("str_StrVfyDciMixedData");
public static string STR_VFY_DCI_NOT_TERMINATED =
(string)Application.Current.FindResource("str_StrVfyDciNotTerminated");
public static string STR_VFY_DCI_SHORT =
(string)Application.Current.FindResource("str_StrVfyDciShort");
public static string STR_VFY_L1_LENGTH_MISMATCH =
(string)Application.Current.FindResource("str_StrVfyL1LengthMismatch");
public static string STR_VFY_L2_LENGTH_MISMATCH =
(string)Application.Current.FindResource("str_StrVfyL2LengthMismatch");
public static string STR_VFY_MISSING_NULL_TERM =
(string)Application.Current.FindResource("str_StrVfyMissingNullTerm");
public static string STR_VFY_NULL_INSIDE_NULL_TERM =
(string)Application.Current.FindResource("str_StrVfyNullInsideNullTerm");
public static string SYMBOL_IMPORT_CAPTION =
(string)Application.Current.FindResource("str_SymbolImportCaption");
public static string SYMBOL_IMPORT_GOOD_FMT =

View File

@ -60,7 +60,7 @@ MON_PRNTYX @ $F940 ;print Y-reg/X-reg as 4 hex digits
MON_PRNTAX @ $F941 ;print Acc/X-reg as 4 hex digits
MON_PRNTX @ $F944 ;print X-reg as 2 hex digits
MON_PRBLNK @ $F948 ;print 3 spaces
MON_PRBL2 @ $F94A
MON_PRBL2 @ $F94A ;print multiple spaces, count in X-reg
MON_PCADJ @ $F953 ;monitor/mini-asm PC adjust
MON_TEXT2COPY @ $F962
MON_OLDIRQ @ $FA40 ;autostart ROM IRQ handler