From f4fe3af050ba6763879ddcb59b9f988b2a857d3c Mon Sep 17 00:00:00 2001 From: Andy McFadden Date: Mon, 6 Jul 2020 17:10:04 -0700 Subject: [PATCH] Fix application of reloc info in data areas The test wasn't correctly excluding instructions, so it was possible to create a situation where a two-byte data item had an instruction starting in the second byte. We also weren't checking the length of the instruction to ensure that it was wider than the reloc data. This could get weird for an immediate constant when the M/X flags are wrong. When in doubt, don't overwrite. --- Asm65/StatusFlags.cs | 5 ++++- SourceGen/Anattrib.cs | 5 +++++ SourceGen/DataAnalysis.cs | 15 +++++++++++---- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/Asm65/StatusFlags.cs b/Asm65/StatusFlags.cs index 6aedfa6..a9c9c2f 100644 --- a/Asm65/StatusFlags.cs +++ b/Asm65/StatusFlags.cs @@ -235,7 +235,10 @@ namespace Asm65 { /// (8-bit) accumulator. /// /// - /// This is where we decide how to treat ambiguous status flags. + /// This is (mostly) where we decide how to treat ambiguous status flags. We favor + /// short flags because, when we get it wrong, it tends to be easier to spot (e.g. + /// LDA #$00xx becomes LDA+BRK). Mistakenly guessing "long" also tends to result in + /// instructions with other instructions embedded in them, which can be confusing. /// public bool IsShortM { get { diff --git a/SourceGen/Anattrib.cs b/SourceGen/Anattrib.cs index 4471836..799b0a9 100644 --- a/SourceGen/Anattrib.cs +++ b/SourceGen/Anattrib.cs @@ -225,6 +225,11 @@ namespace SourceGen { return IsInlineData && DataDescriptor != null; } } + public bool IsUntyped { + get { + return !IsInstruction && !IsData && !IsInlineData; + } + } /// /// Get the target memory address for this byte. diff --git a/SourceGen/DataAnalysis.cs b/SourceGen/DataAnalysis.cs index 9f8bb81..0244079 100644 --- a/SourceGen/DataAnalysis.cs +++ b/SourceGen/DataAnalysis.cs @@ -165,11 +165,16 @@ namespace SourceGen { // Check for a relocation. It'll be at offset+1 because it's on the operand, // not the opcode byte. (Make sure to check the length, or an RTS followed // by relocated data will freak out.) + // + // We don't check for embedded instructions here. If that did somehow happen, + // it's probably intentional, so we should do the replacement. + // // TODO(someday): this won't get the second byte of an MVN/MVP, which is fine // since we don't currently support two formats on one instruction. if (mAnalysisParams.UseRelocData) { if (attr.Length > 1 && mProject.RelocList.TryGetValue(offset + 1, - out DisasmProject.RelocData reloc)) { + out DisasmProject.RelocData reloc) && + attr.Length > reloc.Width) { // The relocation address differs from what the analyzer came up // with. This may be because of incorrect assumptions about the // bank (assuming B==K) or because the partial address refers to @@ -245,15 +250,15 @@ namespace SourceGen { // There shouldn't be any data items inside other data items, so we // can just skip forward. offset += mAnattribs[offset].DataDescriptor.Length - 1; - } else if (mAnalysisParams.UseRelocData && - !attr.IsInstruction && !attr.IsData && !attr.IsInlineData && + } else if (mAnalysisParams.UseRelocData && attr.IsUntyped && mProject.RelocList.TryGetValue(offset, out DisasmProject.RelocData reloc)) { // Byte is unformatted, but there's relocation data here. If the full // range of bytes is unformatted, create a symbolic reference. bool allClear = true; for (int i = 1; i < reloc.Width; i++) { - if (mAnattribs[offset + i].DataDescriptor != null) { + if (!mAnattribs[offset + i].IsUntyped || + mAnattribs[offset + i].DataDescriptor != null) { allClear = false; break; } @@ -382,6 +387,8 @@ namespace SourceGen { // and offsets identified by users or scripts have been categorized.) // // ?? Can we use GetBaseOperandOffset(), which searches for IsStart? + // + // TODO(performance): we spend a significant amount of time in this loop. int scanOffset = targetOffset; while (--scanOffset >= 0) { FormatDescriptor dfd = mAnattribs[scanOffset].DataDescriptor;