/* * Copyright 2019 faddenSoft * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Collections.Generic; using System.Diagnostics; using Asm65; using CommonUtil; using PluginCommon; using SourceGenWPF.Sandbox; namespace SourceGenWPF { /// /// Instruction analyzer. /// /// All data held in this object is transient, and will be discarded when analysis /// completes. All user-defined values should be held elsewhere and provided as inputs /// to the analyzer. Any change that merits re-analysis should be handled by creating a /// new instance of this object. /// /// See the comments at the top of UndoableChange for a list of things that can /// mandate code re-analysis. /// public class CodeAnalysis { /// /// Type hints are specified by the user. The identify a region as being code /// or data. The code analyzer will stop at data-hinted regions, and will /// process any code-hinted regions during the dead-code pass. /// /// The hints are not used directly by the data analyzer, but the effects they /// have on the Anattrib array are. /// public enum TypeHint : sbyte { // No hint. Default value populated in new arrays. NoHint = 0, // Byte is an instruction. If the code analyzer doesn't find this // naturally, it will be scanned. Code, // Byte is inline data. Execution continues "through" the byte. InlineData, // Byte is data. Execution halts. Data } /// /// Class for handling callbacks from extension scripts. /// private class ScriptSupport : MarshalByRefObject, PluginCommon.IApplication { private CodeAnalysis mOuter; public ScriptSupport(CodeAnalysis ca) { mOuter = ca; } /// /// Call this when analysis is complete, to ensure that over-active scripts /// can't keep doing things. (This is not part of IApplication.) /// public void Shutdown() { mOuter = null; } public void DebugLog(string msg) { mOuter.mDebugLog.LogI("PLUGIN: " + msg); } public bool SetOperandFormat(int offset, DataSubType subType, string label) { return mOuter.SetOperandFormat(offset, subType, label); } public bool SetInlineDataFormat(int offset, int length, DataType type, DataSubType subType, string label) { return mOuter.SetInlineDataFormat(offset, length, type, subType, label); } } /// /// Extension script manager. /// private ScriptManager mScriptManager; /// /// Local object that implements the IApplication interface for plugins. /// private ScriptSupport mScriptSupport; /// /// List of interesting plugins. If we have plugins that don't do code inlining we /// can ignore them. (I'm using an array instead of a List<IPlugin> as a /// micro-optimization; see https://stackoverflow.com/a/454923/294248 .) /// private IPlugin[] mScriptArray; /// /// CPU to use when analyzing data. /// private CpuDef mCpuDef; /// /// Map of offsets to addresses. /// private AddressMap mAddrMap; /// /// Reference to 65xx data. /// private byte[] mFileData; /// /// Attributes, one per byte in input file. /// private Anattrib[] mAnattribs; /// /// Reference to type hint array, one hint per byte. /// private TypeHint[] mTypeHints; /// /// Reference to status flag override array, one entry per byte. /// private StatusFlags[] mStatusFlagOverrides; /// /// Initial status flags to use at entry points. /// private StatusFlags mEntryFlags; /// /// Debug trace log. /// private DebugLog mDebugLog = new DebugLog(DebugLog.Priority.Silent); /// /// Constructor. /// /// 65xx code stream. /// CPU definition to use when interpreting code. /// Anattrib array. Expected to be newly allocated, all /// entries set to default values. /// Map of offsets to addresses. /// Type hints, one per byte. /// Status flag overrides for instruction-start /// bytes. /// Status flags to use at code entry points. /// Extension script manager. /// Object that receives debug log messages. public CodeAnalysis(byte[] data, CpuDef cpuDef, Anattrib[] anattribs, AddressMap addrMap, TypeHint[] hints, StatusFlags[] statusFlagOverrides, StatusFlags entryFlags, ScriptManager scriptMan, DebugLog debugLog) { mFileData = data; mCpuDef = cpuDef; mAnattribs = anattribs; mAddrMap = addrMap; mTypeHints = hints; mStatusFlagOverrides = statusFlagOverrides; mEntryFlags = entryFlags; mScriptManager = scriptMan; mDebugLog = debugLog; mScriptSupport = new ScriptSupport(this); } // Internal log functions. If we're concerned about performance overhead due to // call-site string concatenation, we can #ifdef these to nothing in release builds, // which should allow the compiler to elide the concat. #if false private void LogV(int offset, string msg) { if (mDebugLog.IsLoggable(DebugLog.Priority.Verbose)) { mDebugLog.LogV("+" + offset.ToString("x6") + " " + msg); } } #else private void LogV(int offset, string msg) { } #endif #if true private void LogD(int offset, string msg) { if (mDebugLog.IsLoggable(DebugLog.Priority.Debug)) { mDebugLog.LogD("+" + offset.ToString("x6") + " " + msg); } } private void LogI(int offset, string msg) { if (mDebugLog.IsLoggable(DebugLog.Priority.Info)) { mDebugLog.LogI("+" + offset.ToString("x6") + " " + msg); } } private void LogW(int offset, string msg) { if (mDebugLog.IsLoggable(DebugLog.Priority.Warning)) { mDebugLog.LogW("+" + offset.ToString("x6") + " " + msg); } } private void LogE(int offset, string msg) { if (mDebugLog.IsLoggable(DebugLog.Priority.Error)) { mDebugLog.LogE("+" + offset.ToString("x6") + " " + msg); } } #else private void LogD(int offset, string msg) { } private void LogI(int offset, string msg) { } private void LogW(int offset, string msg) { } private void LogE(int offset, string msg) { } #endif /// /// Analyze a blob of code and data, annotating all code areas. /// /// Also identifies data embedded in code, e.g. parameter blocks following a JSR, /// with the help of extension scripts. /// /// Failing here can leave us in a strange state, so prefer to work around unexpected /// inputs rather than bailing entirely. /// public void Analyze() { List scanOffsets = new List(); mDebugLog.LogI("Analyzing code: " + mFileData.Length + " bytes, CPU=" + mCpuDef.Name); PrepareScripts(); SetAddresses(); // Set the "is data" and "is inline data" flags on anything that the user has // flagged as being such. This tells us to stop processing or skip over bytes // as we work. We don't need to flag code hints explicitly for analysis, but // we want to be able to display the flags in the info window. // // The data recognizers may spot additional inline data offsets as we work. This // can cause a race if it mis-identifies code that is also a branch target; // whichever marks the code first will win. UnpackTypeHints(); // Find starting place, based on type hints. // We only set the "visited" flag on the instruction start, so if the user // puts a code hint in the middle of an instruction, we will find it and // treat it as an entry point. (This is useful for embedded instructions // that are branched to by code we aren't able to detect.) int searchStart = FindFirstUnvisitedInstruction(0); while (searchStart >= 0) { mAnattribs[searchStart].IsEntryPoint = true; mAnattribs[searchStart].StatusFlags = mEntryFlags; mAnattribs[searchStart].ApplyStatusFlags(mStatusFlagOverrides[searchStart]); int offset = searchStart; while (true) { bool embedded = (mAnattribs[offset].IsInstruction && !mAnattribs[offset].IsVisited); LogI(offset, "Scan chunk (vis=" + mAnattribs[offset].IsVisited + " chg=" + mAnattribs[offset].IsChanged + (embedded ? " embedded " : "") + ")"); AnalyzeSegment(offset, scanOffsets); // Did anything new get added? if (scanOffsets.Count == 0) { break; } // Pop one off the end. int lastItem = scanOffsets.Count - 1; offset = scanOffsets[lastItem]; scanOffsets.RemoveAt(lastItem); } searchStart = FindFirstUnvisitedInstruction(searchStart); } mScriptSupport.Shutdown(); MarkUnexecutedEmbeddedCode(); } /// /// Prepare a list of relevant extension scripts. /// private void PrepareScripts() { if (mScriptManager == null) { // Currently happens for regression tests with no external files. mScriptArray = new IPlugin[0]; return; } // Include all scripts. mScriptArray = mScriptManager.GetAllInstances().ToArray(); // Prep them. mScriptManager.PrepareScripts(mScriptSupport); } /// /// Sets the address for every byte in the input. /// private void SetAddresses() { // The AddressMap will have at least one entry, will start at offset 0, and // will exactly span the file. foreach (AddressMap.AddressMapEntry ent in mAddrMap) { int addr = ent.Addr; for (int i = ent.Offset; i < ent.Offset + ent.Length; i++) { mAnattribs[i].Address = addr++; } } } /// /// Sets the "is xxxxx" flags on type-hinted entries, so that the code analyzer /// can find them easily. /// private void UnpackTypeHints() { Debug.Assert(mTypeHints.Length == mAnattribs.Length); int offset = 0; foreach (TypeHint hint in mTypeHints) { switch (hint) { case TypeHint.Code: // Set the IsInstruction flag to prevent inline data from being // placed here. OpDef op = mCpuDef.GetOpDef(mFileData[offset]); if (op == OpDef.OpInvalid) { LogI(offset, "Ignoring code hint on illegal opcode"); } else { mAnattribs[offset].IsHinted = true; mAnattribs[offset].IsInstruction = true; } break; case TypeHint.Data: // Tells the code analyzer to stop. Does not define a data analyzer // "uncategorized data" boundary. mAnattribs[offset].IsHinted = true; mAnattribs[offset].IsData = true; break; case TypeHint.InlineData: // Tells the code analyzer to walk across these. mAnattribs[offset].IsHinted = true; mAnattribs[offset].IsInlineData = true; break; case TypeHint.NoHint: break; default: Debug.Assert(false); break; } offset++; } } /// /// Finds the first offset that is hinted as code but hasn't yet been visited. /// /// This might be in the middle of an already-visited instruction. /// /// Offset at which to start the search. /// Offset found. private int FindFirstUnvisitedInstruction(int start) { for (int i = start; i < mAnattribs.Length; i++) { if (mAnattribs[i].IsHinted && mTypeHints[i] == TypeHint.Code && !mAnattribs[i].IsVisited) { LogD(i, "Unvisited code hint"); if (mAnattribs[i].IsData || mAnattribs[i].IsInlineData) { // Maybe the user put a code hint on something that was // later recognized as inline data? Shouldn't have been allowed. LogW(i, "Weird: code hint on data/inline"); continue; } return i; } } return -1; } /// /// Finds bits of code that are part of embedded instructions but not actually /// executed, and marks them as inline data. /// private void MarkUnexecutedEmbeddedCode() { // The problem arises when you have a line like 4C 60 EA, with a branch to the // middle byte. The formatter will print "JMP $EA60", then "