mirror of
https://github.com/fadden/6502bench.git
synced 2024-10-31 19:04:44 +00:00
47b1363738
In the cross-reference table we now indicate whether the reference source is doing a read, write, read-modify-write, branch, subroutine call, is just referencing the address, or is part of the data.
1070 lines
49 KiB
C#
1070 lines
49 KiB
C#
/*
|
|
* Copyright 2018 faddenSoft
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.Diagnostics;
|
|
|
|
using Asm65;
|
|
using CommonUtil;
|
|
using PluginCommon;
|
|
using SourceGen.Sandbox;
|
|
|
|
namespace SourceGen {
|
|
/// <summary>
|
|
/// Instruction analyzer.
|
|
///
|
|
/// All data held in this object is transient, and will be discarded when analysis
|
|
/// completes. All user-defined values should be held elsewhere and provided as inputs
|
|
/// to the analyzer. Any change that merits re-analysis should be handled by creating a
|
|
/// new instance of this object.
|
|
///
|
|
/// See the comments at the top of UndoableChange for a list of things that can
|
|
/// mandate code re-analysis.
|
|
/// </summary>
|
|
public class CodeAnalysis {
|
|
/// <summary>
|
|
/// Type hints are specified by the user. The identify a region as being code
|
|
/// or data. The code analyzer will stop at data-hinted regions, and will
|
|
/// process any code-hinted regions during the dead-code pass.
|
|
///
|
|
/// The hints are not used directly by the data analyzer, but the effects they
|
|
/// have on the Anattrib array are.
|
|
/// </summary>
|
|
public enum TypeHint : sbyte {
|
|
// No hint. Default value populated in new arrays.
|
|
NoHint = 0,
|
|
|
|
// Byte is an instruction. If the code analyzer doesn't find this
|
|
// naturally, it will be scanned.
|
|
Code,
|
|
|
|
// Byte is inline data. Execution continues "through" the byte.
|
|
InlineData,
|
|
|
|
// Byte is data. Execution halts.
|
|
Data
|
|
}
|
|
|
|
/// <summary>
|
|
/// Class for handling callbacks from extension scripts.
|
|
/// </summary>
|
|
private class ScriptSupport : MarshalByRefObject, PluginCommon.IApplication {
|
|
private CodeAnalysis mOuter;
|
|
|
|
public ScriptSupport(CodeAnalysis ca) {
|
|
mOuter = ca;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Call this when analysis is complete, to ensure that over-active scripts
|
|
/// can't keep doing things. (This is not part of IApplication.)
|
|
/// </summary>
|
|
public void Shutdown() {
|
|
mOuter = null;
|
|
}
|
|
|
|
public void DebugLog(string msg) {
|
|
mOuter.mDebugLog.LogI("PLUGIN: " + msg);
|
|
}
|
|
|
|
public bool SetOperandFormat(int offset, DataSubType subType, string label) {
|
|
return mOuter.SetOperandFormat(offset, subType, label);
|
|
}
|
|
|
|
public bool SetInlineDataFormat(int offset, int length, DataType type,
|
|
DataSubType subType, string label) {
|
|
return mOuter.SetInlineDataFormat(offset, length, type, subType, label);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Extension script manager.
|
|
/// </summary>
|
|
private ScriptManager mScriptManager;
|
|
|
|
/// <summary>
|
|
/// Local object that implements the IApplication interface for plugins.
|
|
/// </summary>
|
|
private ScriptSupport mScriptSupport;
|
|
|
|
/// <summary>
|
|
/// List of interesting plugins. If we have plugins that don't do code inlining we
|
|
/// can ignore them. (I'm using an array instead of a List<IPlugin> as a
|
|
/// micro-optimization; see https://stackoverflow.com/a/454923/294248 .)
|
|
/// </summary>
|
|
private IPlugin[] mScriptArray;
|
|
|
|
/// <summary>
|
|
/// CPU to use when analyzing data.
|
|
/// </summary>
|
|
private CpuDef mCpuDef;
|
|
|
|
/// <summary>
|
|
/// Map of offsets to addresses.
|
|
/// </summary>
|
|
private AddressMap mAddrMap;
|
|
|
|
/// <summary>
|
|
/// Reference to 65xx data.
|
|
/// </summary>
|
|
private byte[] mFileData;
|
|
|
|
/// <summary>
|
|
/// Attributes, one per byte in input file.
|
|
/// </summary>
|
|
private Anattrib[] mAnattribs;
|
|
|
|
/// <summary>
|
|
/// Reference to type hint array, one hint per byte.
|
|
/// </summary>
|
|
private TypeHint[] mTypeHints;
|
|
|
|
/// <summary>
|
|
/// Reference to status flag override array, one entry per byte.
|
|
/// </summary>
|
|
private StatusFlags[] mStatusFlagOverrides;
|
|
|
|
/// <summary>
|
|
/// Initial status flags to use at entry points.
|
|
/// </summary>
|
|
private StatusFlags mEntryFlags;
|
|
|
|
/// <summary>
|
|
/// Debug trace log.
|
|
/// </summary>
|
|
private DebugLog mDebugLog = new DebugLog(DebugLog.Priority.Silent);
|
|
|
|
|
|
/// <summary>
|
|
/// Constructor.
|
|
/// </summary>
|
|
/// <param name="data">65xx code stream.</param>
|
|
/// <param name="cpuDef">CPU definition to use when interpreting code.</param>
|
|
/// <param name="anattribs">Anattrib array. Expected to be newly allocated, all
|
|
/// entries set to default values.</param>
|
|
/// <param name="addrMap">Map of offsets to addresses.</param>
|
|
/// <param name="hints">Type hints, one per byte.</param>
|
|
/// <param name="statusFlagOverrides">Status flag overrides for instruction-start
|
|
/// bytes.</param>
|
|
/// <param name="entryFlags">Status flags to use at code entry points.</param>
|
|
/// <param name="scriptMan">Extension script manager.</param>
|
|
/// <param name="debugLog">Object that receives debug log messages.</param>
|
|
public CodeAnalysis(byte[] data, CpuDef cpuDef, Anattrib[] anattribs,
|
|
AddressMap addrMap, TypeHint[] hints, StatusFlags[] statusFlagOverrides,
|
|
StatusFlags entryFlags, ScriptManager scriptMan, DebugLog debugLog) {
|
|
mFileData = data;
|
|
mCpuDef = cpuDef;
|
|
mAnattribs = anattribs;
|
|
mAddrMap = addrMap;
|
|
mTypeHints = hints;
|
|
mStatusFlagOverrides = statusFlagOverrides;
|
|
mEntryFlags = entryFlags;
|
|
mScriptManager = scriptMan;
|
|
mDebugLog = debugLog;
|
|
|
|
mScriptSupport = new ScriptSupport(this);
|
|
}
|
|
|
|
// Internal log functions. If we're concerned about performance overhead due to
|
|
// call-site string concatenation, we can #ifdef these to nothing in release builds,
|
|
// which should allow the compiler to elide the concat.
|
|
#if false
|
|
private void LogV(int offset, string msg) {
|
|
if (mDebugLog.IsLoggable(DebugLog.Priority.Verbose)) {
|
|
mDebugLog.LogV("+" + offset.ToString("x6") + " " + msg);
|
|
}
|
|
}
|
|
#else
|
|
private void LogV(int offset, string msg) { }
|
|
#endif
|
|
#if true
|
|
private void LogD(int offset, string msg) {
|
|
if (mDebugLog.IsLoggable(DebugLog.Priority.Debug)) {
|
|
mDebugLog.LogD("+" + offset.ToString("x6") + " " + msg);
|
|
}
|
|
}
|
|
private void LogI(int offset, string msg) {
|
|
if (mDebugLog.IsLoggable(DebugLog.Priority.Info)) {
|
|
mDebugLog.LogI("+" + offset.ToString("x6") + " " + msg);
|
|
}
|
|
}
|
|
private void LogW(int offset, string msg) {
|
|
if (mDebugLog.IsLoggable(DebugLog.Priority.Warning)) {
|
|
mDebugLog.LogW("+" + offset.ToString("x6") + " " + msg);
|
|
}
|
|
}
|
|
private void LogE(int offset, string msg) {
|
|
if (mDebugLog.IsLoggable(DebugLog.Priority.Error)) {
|
|
mDebugLog.LogE("+" + offset.ToString("x6") + " " + msg);
|
|
}
|
|
}
|
|
#else
|
|
private void LogD(int offset, string msg) { }
|
|
private void LogI(int offset, string msg) { }
|
|
private void LogW(int offset, string msg) { }
|
|
private void LogE(int offset, string msg) { }
|
|
#endif
|
|
|
|
/// <summary>
|
|
/// Analyze a blob of code and data, annotating all code areas.
|
|
///
|
|
/// Also identifies data embedded in code, e.g. parameter blocks following a JSR,
|
|
/// with the help of extension scripts.
|
|
///
|
|
/// Failing here can leave us in a strange state, so prefer to work around unexpected
|
|
/// inputs rather than bailing entirely.
|
|
/// </summary>
|
|
public void Analyze() {
|
|
List<int> scanOffsets = new List<int>();
|
|
|
|
mDebugLog.LogI("Analyzing code: " + mFileData.Length + " bytes, CPU=" + mCpuDef.Name);
|
|
|
|
PrepareScripts();
|
|
|
|
SetAddresses();
|
|
|
|
// Set the "is data" and "is inline data" flags on anything that the user has
|
|
// flagged as being such. This tells us to stop processing or skip over bytes
|
|
// as we work. We don't need to flag code hints explicitly for analysis, but
|
|
// we want to be able to display the flags in the info window.
|
|
//
|
|
// The data recognizers may spot additional inline data offsets as we work. This
|
|
// can cause a race if it mis-identifies code that is also a branch target;
|
|
// whichever marks the code first will win.
|
|
UnpackTypeHints();
|
|
|
|
// Find starting place, based on type hints.
|
|
// We only set the "visited" flag on the instruction start, so if the user
|
|
// puts a code hint in the middle of an instruction, we will find it and
|
|
// treat it as an entry point. (This is useful for embedded instructions
|
|
// that are branched to by code we aren't able to detect.)
|
|
int searchStart = FindFirstUnvisitedInstruction(0);
|
|
while (searchStart >= 0) {
|
|
mAnattribs[searchStart].IsEntryPoint = true;
|
|
mAnattribs[searchStart].StatusFlags = mEntryFlags;
|
|
mAnattribs[searchStart].ApplyStatusFlags(mStatusFlagOverrides[searchStart]);
|
|
|
|
int offset = searchStart;
|
|
while (true) {
|
|
bool embedded = (mAnattribs[offset].IsInstruction &&
|
|
!mAnattribs[offset].IsVisited);
|
|
LogI(offset, "Scan chunk (vis=" + mAnattribs[offset].IsVisited +
|
|
" chg=" + mAnattribs[offset].IsChanged +
|
|
(embedded ? " embedded " : "") + ")");
|
|
|
|
AnalyzeSegment(offset, scanOffsets);
|
|
|
|
// Did anything new get added?
|
|
if (scanOffsets.Count == 0) {
|
|
break;
|
|
}
|
|
|
|
// Pop one off the end.
|
|
int lastItem = scanOffsets.Count - 1;
|
|
offset = scanOffsets[lastItem];
|
|
scanOffsets.RemoveAt(lastItem);
|
|
}
|
|
|
|
searchStart = FindFirstUnvisitedInstruction(searchStart);
|
|
}
|
|
|
|
mScriptSupport.Shutdown();
|
|
|
|
MarkUnexecutedEmbeddedCode();
|
|
}
|
|
|
|
/// <summary>
|
|
/// Prepare a list of relevant extension scripts.
|
|
/// </summary>
|
|
private void PrepareScripts() {
|
|
if (mScriptManager == null) {
|
|
// Currently happens for regression tests with no external files.
|
|
mScriptArray = new IPlugin[0];
|
|
return;
|
|
}
|
|
|
|
// Include all scripts.
|
|
mScriptArray = mScriptManager.GetAllInstances().ToArray();
|
|
|
|
// Prep them.
|
|
mScriptManager.PrepareScripts(mScriptSupport);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Sets the address for every byte in the input.
|
|
/// </summary>
|
|
private void SetAddresses() {
|
|
// The AddressMap will have at least one entry, will start at offset 0, and
|
|
// will exactly span the file.
|
|
foreach (AddressMap.AddressMapEntry ent in mAddrMap) {
|
|
int addr = ent.Addr;
|
|
for (int i = ent.Offset; i < ent.Offset + ent.Length; i++) {
|
|
mAnattribs[i].Address = addr++;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Sets the "is xxxxx" flags on type-hinted entries, so that the code analyzer
|
|
/// can find them easily.
|
|
/// </summary>
|
|
private void UnpackTypeHints() {
|
|
Debug.Assert(mTypeHints.Length == mAnattribs.Length);
|
|
int offset = 0;
|
|
foreach (TypeHint hint in mTypeHints) {
|
|
switch (hint) {
|
|
case TypeHint.Code:
|
|
// Set the IsInstruction flag to prevent inline data from being
|
|
// placed here.
|
|
OpDef op = mCpuDef.GetOpDef(mFileData[offset]);
|
|
if (op == OpDef.OpInvalid) {
|
|
LogI(offset, "Ignoring code hint on illegal opcode");
|
|
} else {
|
|
mAnattribs[offset].IsHinted = true;
|
|
mAnattribs[offset].IsInstruction = true;
|
|
}
|
|
break;
|
|
case TypeHint.Data:
|
|
// Tells the code analyzer to stop. Does not define a data analyzer
|
|
// "uncategorized data" boundary.
|
|
mAnattribs[offset].IsHinted = true;
|
|
mAnattribs[offset].IsData = true;
|
|
break;
|
|
case TypeHint.InlineData:
|
|
// Tells the code analyzer to walk across these.
|
|
mAnattribs[offset].IsHinted = true;
|
|
mAnattribs[offset].IsInlineData = true;
|
|
break;
|
|
case TypeHint.NoHint:
|
|
break;
|
|
default:
|
|
Debug.Assert(false);
|
|
break;
|
|
}
|
|
offset++;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Finds the first offset that is hinted as code but hasn't yet been visited.
|
|
///
|
|
/// This might be in the middle of an already-visited instruction.
|
|
/// </summary>
|
|
/// <param name="start">Offset at which to start the search.</param>
|
|
/// <returns>Offset found.</returns>
|
|
private int FindFirstUnvisitedInstruction(int start) {
|
|
for (int i = start; i < mAnattribs.Length; i++) {
|
|
if (mAnattribs[i].IsHinted && mTypeHints[i] == TypeHint.Code &&
|
|
!mAnattribs[i].IsVisited) {
|
|
LogD(i, "Unvisited code hint");
|
|
if (mAnattribs[i].IsData || mAnattribs[i].IsInlineData) {
|
|
// Maybe the user put a code hint on something that was
|
|
// later recognized as inline data? Shouldn't have been allowed.
|
|
LogW(i, "Weird: code hint on data/inline");
|
|
continue;
|
|
}
|
|
return i;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Finds bits of code that are part of embedded instructions but not actually
|
|
/// executed, and marks them as inline data.
|
|
/// </summary>
|
|
private void MarkUnexecutedEmbeddedCode() {
|
|
// The problem arises when you have a line like 4C 60 EA, with a branch to the
|
|
// middle byte. The formatter will print "JMP $EA60", then "<label> RTS", and
|
|
// then should print NOP. The problem is that the NOP wasn't reached by the
|
|
// code analyzer, and so isn't tagged as an instruction start. It's effectively
|
|
// inline data, so we need to mark it that way.
|
|
//
|
|
// We don't have a quick way to find these, so we just run through the list.
|
|
for (int offset = 0; offset < mFileData.Length; ) {
|
|
if (mAnattribs[offset].IsInstructionStart) {
|
|
int len;
|
|
for (len = 1; len < mAnattribs[offset].Length; len++) {
|
|
if (mAnattribs[offset + len].IsInstructionStart) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
offset += len;
|
|
} else if (mAnattribs[offset].IsInstruction) {
|
|
// bingo
|
|
LogI(offset, "Fixing embedded orphan");
|
|
mAnattribs[offset].IsInstruction = false;
|
|
mAnattribs[offset].IsInlineData = true;
|
|
mAnattribs[offset].DataDescriptor = FormatDescriptor.Create(1,
|
|
FormatDescriptor.Type.NumericLE, FormatDescriptor.SubType.None);
|
|
offset++;
|
|
} else {
|
|
offset++;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Analyzes a code segment. A code segment is a contiguous series of instructions.
|
|
/// We halt if we encounter a return, always-taken branch, or the end of the
|
|
/// current address map section.
|
|
///
|
|
/// If we find branches to unvisited code, or previously-visited code that has
|
|
/// different status flags, we add that to the list of offsets to scan.
|
|
/// </summary>
|
|
/// <param name="offset">Starting offset.</param>
|
|
/// <param name="scanOffsets">Collection to which additional offsets of interest will
|
|
/// be added.</param>
|
|
private void AnalyzeSegment(int offset, List<int> scanOffsets) {
|
|
while (offset < mFileData.Length) {
|
|
if (mAnattribs[offset].IsVisited && !mAnattribs[offset].IsChanged) {
|
|
// already visited, not changed; nothing to do
|
|
LogD(offset, "Visited and not changed, bailing");
|
|
return;
|
|
}
|
|
|
|
bool firstVisit = !mAnattribs[offset].IsVisited;
|
|
|
|
// Set "visited" flag, clear "changed".
|
|
mAnattribs[offset].IsVisited = true;
|
|
mAnattribs[offset].IsChanged = false;
|
|
|
|
if (mAnattribs[offset].IsData) {
|
|
// This area was declared to be data. Go no further. This shouldn't
|
|
// usually happen -- either we should have stopped tracing, or we
|
|
// should have identified the data area as code.
|
|
LogI(offset, "Code ran into data section");
|
|
Debug.Assert(false);
|
|
return;
|
|
} else if (mAnattribs[offset].IsInlineData) {
|
|
// Generally this won't happen, because we ignore branches into inline data
|
|
// areas, we reject attempts to convert code to inline data, and we can't
|
|
// start in an inline area because the hint is wrong. However, it's possible
|
|
// for a JSR to a new section to be registered, and then before we get to
|
|
// it an extension script formats the area as inline data. In that case
|
|
// the inline data "wins", and we stop here.
|
|
LogW(offset, "Code ran into inline data section");
|
|
return;
|
|
}
|
|
|
|
// Identify the instruction, and see if it runs off the end of the file.
|
|
// If it does, treat it as data.
|
|
OpDef op = mCpuDef.GetOpDef(mFileData[offset]);
|
|
int instrLen = op.GetLength(mAnattribs[offset].StatusFlags);
|
|
LogV(offset, "OP $" + mFileData[offset].ToString("X2") + " len=" + instrLen);
|
|
if (offset + instrLen > mFileData.Length) {
|
|
// Instruction runs off the end. It's possible we visited here before with
|
|
// short M/X flags, or some other code jumps to code embedded in our
|
|
// operand. Whatever the case, we want to clear the instruction flag from
|
|
// the first byte. We can mark it as data so subsequent passes don't
|
|
// bump into this.
|
|
LogW(offset, "Instruction runs off end of file");
|
|
mAnattribs[offset].IsInstructionStart = false;
|
|
mAnattribs[offset].IsInstruction = false;
|
|
mAnattribs[offset].IsData = true;
|
|
return;
|
|
}
|
|
if (mAnattribs[offset + instrLen -1].Address !=
|
|
mAnattribs[offset].Address + instrLen - 1) {
|
|
// Address change happened mid-instruction. Mark it as data.
|
|
LogW(offset, "Detected address change mid-instruction");
|
|
mAnattribs[offset].IsInstructionStart = false;
|
|
mAnattribs[offset].IsInstruction = false;
|
|
mAnattribs[offset].IsData = true;
|
|
return;
|
|
}
|
|
|
|
// Instruction not defined for this CPU. Treat as data.
|
|
if (op.AddrMode == OpDef.AddressMode.Unknown) {
|
|
LogW(offset, "Instruction stream encountered invalid opcode ($" +
|
|
mFileData[offset].ToString("x2") + ")");
|
|
return;
|
|
}
|
|
|
|
// Flag as start of valid instruction, and mark all bytes as instructions.
|
|
// There's a possible conflict here if the first byte is marked as an
|
|
// instruction, but bytes within the instruction are marked as data. The
|
|
// easiest thing to do here is steamroll the data flags.
|
|
//
|
|
// (To cause this, hint a 3-byte instruction as data/inline-data, then
|
|
// hint the first byte of the instruction as code.)
|
|
mAnattribs[offset].IsInstructionStart = true;
|
|
mAnattribs[offset].Length = instrLen;
|
|
for (int i = offset; i < offset + instrLen; i++) {
|
|
if (mAnattribs[i].IsData) {
|
|
LogW(i, "Stripping mid-instruction data flag");
|
|
mAnattribs[i].IsData = false;
|
|
} else if (mAnattribs[i].IsInlineData) {
|
|
LogW(i, "Stripping mid-instruction inline-data flag");
|
|
mAnattribs[i].IsInlineData = false;
|
|
}
|
|
mAnattribs[i].IsInstruction = true;
|
|
}
|
|
|
|
// Compute the effect on the status flags.
|
|
StatusFlags newFlags, condBranchTakenFlags;
|
|
if (op == OpDef.OpPLP_StackPull) {
|
|
// PLP restores flags from the stack.
|
|
newFlags = condBranchTakenFlags = GuessFlagsForPLP(offset);
|
|
} else {
|
|
op.ComputeFlagChanges(mAnattribs[offset].StatusFlags, mFileData, offset,
|
|
out newFlags, out condBranchTakenFlags);
|
|
}
|
|
|
|
// Handle stuff that won't be different on a subsequent visit.
|
|
if (firstVisit) {
|
|
// Decode the operand for instructions that reference an address. If
|
|
// the target address is within the file's address space, record the
|
|
// offset as well. This doesn't examine immediate operands.
|
|
DecodeOperandAddress(offset, op);
|
|
}
|
|
|
|
int branchOffset = -1;
|
|
bool doBranch, doContinue;
|
|
|
|
// Check for branching.
|
|
if (op.IsBranchOrSubCall) {
|
|
if (mAnattribs[offset].IsOperandOffsetDirect) {
|
|
branchOffset = mAnattribs[offset].OperandOffset;
|
|
}
|
|
if (branchOffset >= 0 && branchOffset < mFileData.Length) {
|
|
doBranch = true;
|
|
} else {
|
|
// External branch. Very common for JSR to ROM routines and JMP
|
|
// through an indirect address. Not usually expected for relative
|
|
// branches.
|
|
if (op.Effect != OpDef.FlowEffect.CallSubroutine) {
|
|
LogD(offset, "Branch goes external");
|
|
}
|
|
doBranch = false;
|
|
mAnattribs[offset].IsExternalBranch = true;
|
|
}
|
|
} else {
|
|
doBranch = false;
|
|
}
|
|
|
|
// Check continuation to next instruction.
|
|
switch (op.Effect) {
|
|
case OpDef.FlowEffect.Cont:
|
|
case OpDef.FlowEffect.CallSubroutine:
|
|
case OpDef.FlowEffect.ConditionalBranch:
|
|
doContinue = true;
|
|
break;
|
|
default:
|
|
doContinue = false;
|
|
break;
|
|
}
|
|
|
|
// Some 6502 code works around the lack of a branch-always instruction with
|
|
// a complement pair (e.g. BCC + BCS), so we don't want to continue past a branch
|
|
// always taken. The converse is also true: don't pursue a branch if it's
|
|
// never taken. An example from 6502.org:
|
|
// "... a common sequence on the 6502 family is:
|
|
// CLEAR_FLAG CLC
|
|
// DB $B0
|
|
// SET_FLAG SEC
|
|
// ROR FLAG
|
|
// RTS
|
|
// When entering via CLEAR_FLAG, the $B0 becomes a 2-cycle BCS instruction, which
|
|
// is not taken (since the carry is clear). Since BCS does not affect any flags,
|
|
// it serves, in this situation, as a two byte, two cycle NOP and provides a
|
|
// subtle, but useful way to efficiently skip the SEC instruction."
|
|
|
|
// Revise branch/cont for conditional branch instructions.
|
|
if (op.Effect == OpDef.FlowEffect.ConditionalBranch) {
|
|
OpDef.BranchTaken taken =
|
|
OpDef.IsBranchTaken(op, mAnattribs[offset].StatusFlags);
|
|
if (taken == OpDef.BranchTaken.Never) {
|
|
doBranch = false;
|
|
} else if (taken == OpDef.BranchTaken.Always) {
|
|
doContinue = false;
|
|
}
|
|
mAnattribs[offset].BranchTaken = taken;
|
|
}
|
|
|
|
// Make sure destination isn't already flagged as data.
|
|
if (doBranch) {
|
|
Debug.Assert(branchOffset >= 0);
|
|
if (mAnattribs[branchOffset].IsData || mAnattribs[branchOffset].IsInlineData) {
|
|
LogW(offset, "Ignoring branch to +" + branchOffset.ToString("x6") +
|
|
" (data region)");
|
|
doBranch = false;
|
|
branchOffset = -1;
|
|
}
|
|
}
|
|
|
|
LogV(offset, "doBranch=" + doBranch + ", doCont=" + doContinue);
|
|
|
|
if (doBranch) {
|
|
// Flag the destination offset as a branch target.
|
|
mAnattribs[branchOffset].IsBranchTarget = true;
|
|
|
|
// Merge our status flags with theirs.
|
|
StatusFlags branchStatusBefore = mAnattribs[branchOffset].StatusFlags;
|
|
mAnattribs[branchOffset].MergeStatusFlags(condBranchTakenFlags);
|
|
mAnattribs[branchOffset].ApplyStatusFlags(mStatusFlagOverrides[branchOffset]);
|
|
|
|
// If we need to (re-)scan this offset, add it to the list.
|
|
//AttribFlags branchFlags = mAnattribs[branchOffset].mAttribFlags;
|
|
bool addToScan = false;
|
|
string why;
|
|
if (!mAnattribs[branchOffset].IsVisited) {
|
|
// Not yet visited. Some flags may have been set by earlier branch.
|
|
// Merge status flags and add to scan list if not already present.
|
|
addToScan = true;
|
|
why = "(not visited)";
|
|
} else {
|
|
// Visited before. If the status flags changed, set "changed" and
|
|
// add to scan offsets.
|
|
if (branchStatusBefore != mAnattribs[branchOffset].StatusFlags) {
|
|
mAnattribs[branchOffset].IsChanged = true;
|
|
addToScan = true;
|
|
}
|
|
why = "(flags: " + branchStatusBefore + " -> " +
|
|
mAnattribs[branchOffset].StatusFlags + ")";
|
|
}
|
|
if (addToScan && !scanOffsets.Contains(branchOffset)) {
|
|
LogD(offset, "Adding " + branchOffset.ToString("x4") +
|
|
" to scan list " + why);
|
|
scanOffsets.Add(branchOffset);
|
|
}
|
|
}
|
|
|
|
if (!doContinue) {
|
|
mAnattribs[offset].DoesNotContinue = true;
|
|
break;
|
|
} else {
|
|
mAnattribs[offset].DoesNotContinue = false;
|
|
}
|
|
|
|
// Sanity check to avoid infinite loop.
|
|
if (instrLen <= 0) {
|
|
LogE(offset, "Internal error: instruction length " + instrLen);
|
|
throw new Exception("Instruction length was " + instrLen);
|
|
}
|
|
|
|
int nextOffset = offset + instrLen;
|
|
if (nextOffset >= mFileData.Length) {
|
|
// next instruction is off the end of the file
|
|
LogW(offset, "Execution ran off the end of the file");
|
|
break;
|
|
}
|
|
|
|
// On first visit, check for JSR/JSL inline call.
|
|
if (firstVisit) {
|
|
// Currently ignoring OpDef.OpJSR_AbsIndexXInd
|
|
if (op == OpDef.OpJSR_Abs || op == OpDef.OpJSR_AbsLong) {
|
|
CheckForInlineCall(op, offset, out bool noContinue);
|
|
if (noContinue) {
|
|
LogD(offset, "Script declared inline call no-continue");
|
|
mAnattribs[offset].DoesNotContinue = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Are we about to walk into inline data?
|
|
int inlineDataGapLen = 0;
|
|
while (nextOffset < mFileData.Length && mAnattribs[nextOffset].IsInlineData) {
|
|
// Skip over it to find next instruction (or next inline data chunk).
|
|
// Note Anattrib.Length==0 unless a format has been applied, so we just
|
|
// walk forward a byte at a time.
|
|
inlineDataGapLen++;
|
|
nextOffset++;
|
|
}
|
|
|
|
// Re-check after inline data advance.
|
|
if (nextOffset >= mFileData.Length) {
|
|
// next instruction is off the end of the file
|
|
LogW(offset, "Execution ran off the end of the file");
|
|
break;
|
|
}
|
|
if (mAnattribs[nextOffset].IsData) {
|
|
// Drove into a data section
|
|
LogW(offset, "Execution ran into a data area");
|
|
break;
|
|
}
|
|
|
|
// Make sure we don't "continue" across an ORG.
|
|
// NOTE: it's possible to do some crazy things with multiple ORGs that will
|
|
// cause us to misinterpret things, but I don't think that matters. What's
|
|
// important is that the code analyzer doesn't drive into a data area.
|
|
int expectedAddr = mAnattribs[offset].Address + mAnattribs[offset].Length +
|
|
inlineDataGapLen;
|
|
if (mAnattribs[nextOffset].Address != expectedAddr) {
|
|
LogW(offset, "Execution ran across address change (" +
|
|
expectedAddr.ToString("x4") + " vs. " +
|
|
mAnattribs[nextOffset].Address.ToString("x4") + ")");
|
|
break;
|
|
}
|
|
|
|
// Merge the updated status flags into the next instruction.
|
|
StatusFlags nextStatusBefore = mAnattribs[nextOffset].StatusFlags;
|
|
mAnattribs[nextOffset].MergeStatusFlags(newFlags);
|
|
mAnattribs[nextOffset].ApplyStatusFlags(mStatusFlagOverrides[nextOffset]);
|
|
|
|
// If we've already visited the next offset, and the updated status flags are
|
|
// the same as the previous status flags, then there's nothing to gain by
|
|
// continuing forward.
|
|
if (mAnattribs[nextOffset].IsVisited) {
|
|
if (!mAnattribs[nextOffset].IsChanged &&
|
|
nextStatusBefore == mAnattribs[nextOffset].StatusFlags) {
|
|
// Instruction has been visited, hasn't been flagged as changed,
|
|
// and our status flag merge had no effect. No need to continue
|
|
// through.
|
|
LogV(offset, "Not re-examining " + nextOffset);
|
|
break;
|
|
}
|
|
}
|
|
|
|
offset = nextOffset;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Attempts to guess what the flags will be after a PLP instruction.
|
|
/// </summary>
|
|
/// <param name="plpOffset">Offset of PLP instruction.</param>
|
|
/// <returns>Best guess at status flags.</returns>
|
|
private StatusFlags GuessFlagsForPLP(int plpOffset) {
|
|
// We're not tracking stack contents or register contents, so this just
|
|
// generally won't work. However, there's a lot of code that uses PHP to
|
|
// save the current state and PLP to restore it, so if we can find a nearby
|
|
// PHP we can just grab from that.
|
|
//
|
|
// Failing that, we mark all flags as "indeterminate" and let the user sort
|
|
// out what it should be. It's unlikely to matter except for M/X flags on
|
|
// the 65816.
|
|
//
|
|
// The emulation flag is not part of the status register, even if we do carry
|
|
// it around like one. The E-flag is always carried over from the previous
|
|
// instruction.
|
|
|
|
int backOffsetLimit = plpOffset - 128; // arbitrary 128-byte reach
|
|
if (backOffsetLimit < 0) {
|
|
backOffsetLimit = 0;
|
|
}
|
|
StatusFlags flags = StatusFlags.AllIndeterminate;
|
|
for (int offset = plpOffset - 1; offset >= backOffsetLimit; offset--) {
|
|
Anattrib attr = mAnattribs[offset];
|
|
if (!attr.IsInstructionStart || !attr.IsVisited) {
|
|
continue;
|
|
}
|
|
OpDef op = mCpuDef.GetOpDef(mFileData[offset]);
|
|
if (op == OpDef.OpPHP_StackPush) {
|
|
LogI(plpOffset, "Found visited PHP at +" + offset.ToString("x6"));
|
|
flags = mAnattribs[offset].StatusFlags;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Transfer the 'E' flag.
|
|
flags.E = mAnattribs[plpOffset].StatusFlags.E;
|
|
return flags;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Extracts the address from the operand of an absolute or relative operation.
|
|
/// Anything that could be referenced by a label or address equate is appropriate.
|
|
/// The goal is to identify data and branch targets, not generate a second copy
|
|
/// of the operand.
|
|
///
|
|
/// The operand's address, and if applicable, the operand's file offset, are
|
|
/// stored in the Anattrib array.
|
|
///
|
|
/// For PC-relative operands (e.g. branches) it's tempting to simply adjust the file
|
|
/// offset by the specified amount and convert that to an address. If the file
|
|
/// has multiple ORGs, this can produce incorrect results. We need to convert the
|
|
/// opcode's offset to an address, adjust by the operand, and then find the file
|
|
/// offset that corresponds to the target address.
|
|
///
|
|
/// Doesn't do anything with immediate data.
|
|
/// </summary>
|
|
/// <param name="offset">Offset of the instruction opcode.</param>
|
|
/// <param name="op">Opcode being handled. (Passed in because the caller has it
|
|
/// handy.)</param>
|
|
private void DecodeOperandAddress(int offset, OpDef op) {
|
|
//StatusFlags flags = mAnattribs[offset].StatusFlags;
|
|
|
|
int operand = op.GetOperand(mFileData, offset, mAnattribs[offset].StatusFlags);
|
|
|
|
// Add the bank to get a 24-bit address. We're currently using the program bank
|
|
// (K) rather than the data bank (B), which is correct for absolute and relative
|
|
// branches but wrong for 16-bit data operations. We currently have no way to
|
|
// know what the value of B is, so we use K because there's some small chance
|
|
// of it being correct.
|
|
// TODO(someday): figure out how to get the correct value for the B reg
|
|
int bank = mAnattribs[offset].Address & 0x7fff0000;
|
|
|
|
// Extract target address.
|
|
switch (op.AddrMode) {
|
|
// These might refer to a location in the file, or might be external.
|
|
case OpDef.AddressMode.Abs:
|
|
case OpDef.AddressMode.AbsIndexX:
|
|
case OpDef.AddressMode.AbsIndexY:
|
|
case OpDef.AddressMode.AbsIndexXInd:
|
|
case OpDef.AddressMode.AbsInd:
|
|
case OpDef.AddressMode.AbsIndLong:
|
|
case OpDef.AddressMode.StackAbs:
|
|
mAnattribs[offset].OperandAddress = operand | bank;
|
|
break;
|
|
case OpDef.AddressMode.DP:
|
|
case OpDef.AddressMode.DPIndexX:
|
|
case OpDef.AddressMode.DPIndexY:
|
|
case OpDef.AddressMode.DPIndexXInd:
|
|
case OpDef.AddressMode.DPInd:
|
|
case OpDef.AddressMode.DPIndLong:
|
|
case OpDef.AddressMode.DPIndIndexY:
|
|
case OpDef.AddressMode.DPIndIndexYLong:
|
|
case OpDef.AddressMode.StackDPInd:
|
|
// always bank 0
|
|
mAnattribs[offset].OperandAddress = operand;
|
|
break;
|
|
case OpDef.AddressMode.AbsIndexXLong:
|
|
case OpDef.AddressMode.AbsLong:
|
|
// 24-bit address, don't add bank
|
|
mAnattribs[offset].OperandAddress = operand;
|
|
break;
|
|
case OpDef.AddressMode.PCRel: // rel operand; convert to absolute addr
|
|
mAnattribs[offset].OperandAddress =
|
|
Asm65.Helper.RelOffset8(mAnattribs[offset].Address,
|
|
(sbyte)operand) | bank;
|
|
break;
|
|
case OpDef.AddressMode.PCRelLong:
|
|
case OpDef.AddressMode.StackPCRelLong:
|
|
mAnattribs[offset].OperandAddress =
|
|
Asm65.Helper.RelOffset16(mAnattribs[offset].Address,
|
|
(short)operand) | bank;
|
|
break;
|
|
default:
|
|
// Immediate, implied, accumulator, stack relative. We can't do
|
|
// immediate yet because we won't necessarily have a final assessment
|
|
// of the operand width.
|
|
Debug.Assert(mAnattribs[offset].OperandAddress == -1);
|
|
break;
|
|
}
|
|
|
|
if (mAnattribs[offset].OperandAddress >= 0) {
|
|
int operandOffset = mAddrMap.AddressToOffset(offset,
|
|
mAnattribs[offset].OperandAddress);
|
|
if (operandOffset >= 0) {
|
|
mAnattribs[offset].OperandOffset = operandOffset;
|
|
|
|
// Set a flag if this is a direct offset. This is used when tracing
|
|
// through jump instructions, as we can't necessarily decode an indirect
|
|
// jump. (There are *some* indirect JMPs we can handle, if the operand
|
|
// is an address in the file data area.)
|
|
switch (op.AddrMode) {
|
|
case OpDef.AddressMode.Abs:
|
|
case OpDef.AddressMode.AbsLong:
|
|
case OpDef.AddressMode.DP:
|
|
case OpDef.AddressMode.PCRel:
|
|
case OpDef.AddressMode.PCRelLong:
|
|
case OpDef.AddressMode.StackPCRelLong:
|
|
case OpDef.AddressMode.StackAbs:
|
|
mAnattribs[offset].IsOperandOffsetDirect = true;
|
|
break;
|
|
default:
|
|
mAnattribs[offset].IsOperandOffsetDirect = false;
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
Debug.Assert(mAnattribs[offset].OperandOffset == -1);
|
|
Debug.Assert(!mAnattribs[offset].IsOperandOffsetDirect);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Queries script extensions to check to see if a JSR or JSL is actually an inline call.
|
|
/// </summary>
|
|
/// <param name="op">Instruction being examined.</param>
|
|
/// <param name="offset">File offset of start of instruction.</param>
|
|
/// <param name="noContinue">Set if any plugin declares the call to be no-continue.</param>
|
|
private void CheckForInlineCall(OpDef op, int offset, out bool noContinue) {
|
|
noContinue = false;
|
|
for (int i = 0; i < mScriptArray.Length; i++) {
|
|
IPlugin script = mScriptArray[i];
|
|
if (op == OpDef.OpJSR_Abs) {
|
|
script.CheckJsr(offset, out bool noCont);
|
|
noContinue |= noCont;
|
|
} else if (op == OpDef.OpJSR_AbsLong) {
|
|
script.CheckJsl(offset, out bool noCont);
|
|
noContinue |= noCont;
|
|
} else {
|
|
Debug.Assert(false);
|
|
}
|
|
}
|
|
}
|
|
|
|
private bool SetOperandFormat(int offset, DataSubType subType, string label) {
|
|
if (offset <= 0 || offset > mFileData.Length) {
|
|
throw new Exception("SOF: bad args: offset=+" + offset.ToString("x6") +
|
|
" subType=" + subType + " label='" + label + "'; file length is" +
|
|
mFileData.Length);
|
|
}
|
|
|
|
// Don't overwrite existing format.
|
|
if (mAnattribs[offset].DataDescriptor != null) {
|
|
LogW(offset, "SOF: already have a descriptor here");
|
|
return false;
|
|
}
|
|
|
|
// Must be the start of an instruction.
|
|
if (!mAnattribs[offset].IsInstructionStart) {
|
|
LogW(offset, "SOF: not an instruction start");
|
|
return false;
|
|
}
|
|
|
|
if (subType == DataSubType.Symbol && string.IsNullOrEmpty(label)) {
|
|
LogW(offset, "SOF rej: label required for subType=" + subType);
|
|
return false;
|
|
}
|
|
|
|
FormatDescriptor.SubType subFmt = ConvertPluginSubType(subType, out bool isNumericSub);
|
|
if (!isNumericSub && subFmt != FormatDescriptor.SubType.None) {
|
|
LogW(offset, "SOF: bad sub-type " + subType);
|
|
return false;
|
|
}
|
|
|
|
int instrLen = mAnattribs[offset].Length;
|
|
Debug.Assert(instrLen > 0);
|
|
|
|
FormatDescriptor fd;
|
|
if (subType == DataSubType.Symbol) {
|
|
fd = FormatDescriptor.Create(instrLen,
|
|
new WeakSymbolRef(label, WeakSymbolRef.Part.Low),
|
|
false);
|
|
} else {
|
|
fd = FormatDescriptor.Create(instrLen, FormatDescriptor.Type.NumericLE, subFmt);
|
|
}
|
|
mAnattribs[offset].DataDescriptor = fd;
|
|
return true;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Handles a set data format call from an extension script.
|
|
/// </summary>
|
|
private bool SetInlineDataFormat(int offset, int length, DataType type,
|
|
DataSubType subType, string label) {
|
|
if (offset <= 0 || offset + length > mFileData.Length) {
|
|
throw new Exception("SIDF: bad args: offset=+" + offset.ToString("x6") +
|
|
" len=" + length + " type=" + type + " subType=" + subType +
|
|
" label='" + label + "'; file length is" + mFileData.Length);
|
|
}
|
|
|
|
// Already formatted? We only check the initial offset -- overlapping format
|
|
// descriptors aren't strictly illegal.
|
|
if (mAnattribs[offset].DataDescriptor != null) {
|
|
LogW(offset, "SIDF: already have a descriptor here");
|
|
return false;
|
|
}
|
|
|
|
// Don't allow formatting of any bytes that are identified as instructions or
|
|
// were hinted by the user as something other than inline data. If the code
|
|
// analyzer comes crashing through later they'll just stomp on what we've done.
|
|
for (int i = offset; i < offset + length; i++) {
|
|
if (mTypeHints[i] != TypeHint.NoHint && mTypeHints[i] != TypeHint.InlineData) {
|
|
LogW(offset, "SIDF rej: already a hint at " + i.ToString("x6") +
|
|
" (" + mTypeHints[i] + ")");
|
|
return false;
|
|
}
|
|
if (mAnattribs[offset].IsInstruction) {
|
|
LogW(offset, "SIDF rej: not for use with instructions");
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Convert type to FormatDescriptor type, and do some validity checks.
|
|
FormatDescriptor.Type fmt = ConvertPluginType(type);
|
|
FormatDescriptor.SubType subFmt = ConvertPluginSubType(subType, out bool isNumericSub);
|
|
|
|
if (type == DataType.Dense && subType != DataSubType.None) {
|
|
throw new Exception("SIDF rej: dense data must use subType=None");
|
|
}
|
|
|
|
if (isNumericSub && fmt != FormatDescriptor.Type.NumericLE &&
|
|
fmt != FormatDescriptor.Type.NumericBE) {
|
|
throw new Exception("SIDF rej: bad type/subType combo: type=" +
|
|
type + " subType= " + subType);
|
|
}
|
|
if ((type == DataType.NumericLE || type == DataType.NumericBE) &&
|
|
(length < 1 || length > 4)) {
|
|
throw new Exception("SIDF rej: bad length for numeric item (" +
|
|
length + ")");
|
|
}
|
|
if (subType == DataSubType.Symbol && string.IsNullOrEmpty(label)) {
|
|
throw new Exception("SIDF rej: label required for subType=" + subType);
|
|
}
|
|
|
|
// Looks good, create a descriptor, and mark all bytes as inline data.
|
|
FormatDescriptor fd;
|
|
if (subType == DataSubType.Symbol) {
|
|
fd = FormatDescriptor.Create(length,
|
|
new WeakSymbolRef(label, WeakSymbolRef.Part.Low),
|
|
type == DataType.NumericBE);
|
|
} else {
|
|
fd = FormatDescriptor.Create(length, fmt, subFmt);
|
|
}
|
|
mAnattribs[offset].DataDescriptor = fd;
|
|
for (int i = offset; i < offset + length; i++) {
|
|
mAnattribs[i].IsInlineData = true;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
private FormatDescriptor.Type ConvertPluginType(DataType pluginType) {
|
|
switch (pluginType) {
|
|
case DataType.NumericLE:
|
|
return FormatDescriptor.Type.NumericLE;
|
|
case DataType.NumericBE:
|
|
return FormatDescriptor.Type.NumericBE;
|
|
case DataType.Dense:
|
|
return FormatDescriptor.Type.Dense;
|
|
case DataType.String:
|
|
case DataType.Fill:
|
|
default:
|
|
// not appropriate for operands, or inline data (?)
|
|
throw new Exception("Instr format rej: unexpected format type " + pluginType);
|
|
}
|
|
}
|
|
|
|
private FormatDescriptor.SubType ConvertPluginSubType(DataSubType pluginSubType,
|
|
out bool isNumericSub) {
|
|
isNumericSub = true;
|
|
switch (pluginSubType) {
|
|
case DataSubType.None:
|
|
isNumericSub = false;
|
|
return FormatDescriptor.SubType.None;
|
|
case DataSubType.Hex:
|
|
return FormatDescriptor.SubType.Hex;
|
|
case DataSubType.Decimal:
|
|
return FormatDescriptor.SubType.Decimal;
|
|
case DataSubType.Binary:
|
|
return FormatDescriptor.SubType.Binary;
|
|
case DataSubType.Ascii:
|
|
return FormatDescriptor.SubType.Ascii;
|
|
case DataSubType.Address:
|
|
return FormatDescriptor.SubType.Address;
|
|
case DataSubType.Symbol:
|
|
return FormatDescriptor.SubType.Symbol;
|
|
default:
|
|
throw new Exception("Instr format rej: unexpected sub type " + pluginSubType);
|
|
}
|
|
}
|
|
}
|
|
} |