/* * Copyright 2019 faddenSoft * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Collections.Generic; using System.Diagnostics; using Asm65; using CommonUtil; using PluginCommon; using SourceGen.Sandbox; namespace SourceGen { /// /// Instruction analyzer. /// /// All data held in this object is transient, and will be discarded when analysis /// completes. All user-defined values should be held elsewhere and provided as inputs /// to the analyzer. Any change that merits re-analysis should be handled by creating a /// new instance of this object. /// /// See the comments at the top of UndoableChange for a list of things that can /// mandate code re-analysis. /// /// /// This invokes methods in extension scripts to handle things like inline data /// following a JSR. The added cost is generally low, because the AppDomain security /// sandbox doesn't add a lot of overhead. Unfortunately this approach is deprecated /// by Microsoft and may break or become unavailable. If that happens, and we have to /// switch to a sandbox approach with significant overhead, we will most likely want /// to move the code analyzer itself into the sandbox. /// /// For this reason it's best to minimize direct interaction between the code here and /// that elsewhere in the program. /// public class CodeAnalysis { /// /// Analyzer tags are specified by the user. They identify an offset as being the /// start or end of an executable code region, or part of an inline data block. /// /// The tags are not used directly by the data analyzer, but the effects they /// have on the Anattrib array are. /// /// /// THESE VALUES ARE SERIALIZED to the project data file. They cannot be renamed /// without writing a translator in ProjectFile. /// public enum AnalyzerTag : sbyte { // No tag. Default value populated in new arrays. None = 0, // Byte is an instruction. If the code analyzer doesn't find this // naturally, it will be scanned. Code, // Byte is inline data. Execution skips over the byte. InlineData, // Byte is data. Execution halts. Data } /// /// Class for handling callbacks from extension scripts. /// private class ScriptSupport : MarshalByRefObject, PluginCommon.IApplication { private CodeAnalysis mOuter; public ScriptSupport(CodeAnalysis ca) { mOuter = ca; } /// /// Call this when analysis is complete, to ensure that over-active scripts /// can't keep doing things. (This is not part of IApplication.) /// public void Shutdown() { mOuter = null; } public void ReportError(string msg) { DebugLog(msg); } public void DebugLog(string msg) { mOuter.mDebugLog.LogI("PLUGIN: " + msg); } public bool SetOperandFormat(int offset, DataSubType subType, string label) { return mOuter.SetOperandFormat(offset, subType, label); } public bool SetInlineDataFormat(int offset, int length, DataType type, DataSubType subType, string label) { return mOuter.SetInlineDataFormat(offset, length, type, subType, label); } } /// /// Extension script manager. /// private ScriptManager mScriptManager; /// /// Local object that implements the IApplication interface for plugins. /// private ScriptSupport mScriptSupport; /// /// List of interesting plugins. If we have plugins that don't do code inlining we /// can ignore them. (I'm using an array instead of a List<IPlugin> as a /// micro-optimization; see https://stackoverflow.com/a/454923/294248 .) /// private IPlugin[] mScriptArray; [Flags] private enum PluginCap { NONE = 0, JSR = 1 << 0, JSL = 1 << 1, BRK = 1 << 2 }; private PluginCap[] mPluginCaps; /// /// CPU to use when analyzing data. /// private CpuDef mCpuDef; /// /// Map of offsets to addresses. /// private AddressMap mAddrMap; /// /// Reference to 65xx data. /// private byte[] mFileData; /// /// Attributes, one per byte in input file. /// private Anattrib[] mAnattribs; /// /// Reference to analyzer tag array, one entry per byte. /// private AnalyzerTag[] mAnalyzerTags; /// /// Reference to status flag override array, one entry per byte. /// private StatusFlags[] mStatusFlagOverrides; /// /// Initial status flags to use at entry points. /// private StatusFlags mEntryFlags; /// /// User-configurable analysis parameters. /// private ProjectProperties.AnalysisParameters mAnalysisParameters; /// /// Debug trace log. /// private DebugLog mDebugLog = new DebugLog(DebugLog.Priority.Silent); /// /// Constructor. /// /// 65xx code stream. /// CPU definition to use when interpreting code. /// Anattrib array. Expected to be newly allocated, all /// entries set to default values. /// Map of offsets to addresses. /// Analyzer tags, one per byte. /// Status flag overrides for instruction-start /// bytes. /// Status flags to use at code entry points. /// Extension script manager. /// Analysis parameters. /// Object that receives debug log messages. public CodeAnalysis(byte[] data, CpuDef cpuDef, Anattrib[] anattribs, AddressMap addrMap, AnalyzerTag[] atags, StatusFlags[] statusFlagOverrides, StatusFlags entryFlags, ProjectProperties.AnalysisParameters parms, ScriptManager scriptMan, DebugLog debugLog) { mFileData = data; mCpuDef = cpuDef; mAnattribs = anattribs; mAddrMap = addrMap; mAnalyzerTags = atags; mStatusFlagOverrides = statusFlagOverrides; mEntryFlags = entryFlags; mScriptManager = scriptMan; mAnalysisParameters = parms; mDebugLog = debugLog; mScriptSupport = new ScriptSupport(this); } // Internal log functions. If we're concerned about performance overhead due to // call-site string concatenation, we can #ifdef these to nothing in release builds, // which should allow the compiler to elide the concat. #if false private void LogV(int offset, string msg) { if (mDebugLog.IsLoggable(DebugLog.Priority.Verbose)) { mDebugLog.LogV("+" + offset.ToString("x6") + " " + msg); } } #else private void LogV(int offset, string msg) { } #endif #if true private void LogD(int offset, string msg) { if (mDebugLog.IsLoggable(DebugLog.Priority.Debug)) { mDebugLog.LogD("+" + offset.ToString("x6") + " " + msg); } } private void LogI(int offset, string msg) { if (mDebugLog.IsLoggable(DebugLog.Priority.Info)) { mDebugLog.LogI("+" + offset.ToString("x6") + " " + msg); } } private void LogW(int offset, string msg) { if (mDebugLog.IsLoggable(DebugLog.Priority.Warning)) { mDebugLog.LogW("+" + offset.ToString("x6") + " " + msg); } } private void LogE(int offset, string msg) { if (mDebugLog.IsLoggable(DebugLog.Priority.Error)) { mDebugLog.LogE("+" + offset.ToString("x6") + " " + msg); } } #else private void LogD(int offset, string msg) { } private void LogI(int offset, string msg) { } private void LogW(int offset, string msg) { } private void LogE(int offset, string msg) { } #endif /// /// Analyze a blob of code and data, annotating all code areas. /// /// Also identifies data embedded in code, e.g. parameter blocks following a JSR, /// with the help of extension scripts. /// /// Failing here can leave us in a strange state, so prefer to work around unexpected /// inputs rather than bailing entirely. /// public void Analyze() { List scanOffsets = new List(); mDebugLog.LogI("Analyzing code: " + mFileData.Length + " bytes, CPU=" + mCpuDef.Name); PrepareScripts(); SetAddresses(); // Set values in the anattrib array based on the user-specified analyzer tags. // This tells us to stop processing or skip over bytes as we work. We set values // for the code start tags so we can show them in the "info" window. // // The data recognizers may spot additional inline data offsets as we work. This // can cause a race if it mis-identifies code that is also a branch target; // whichever marks the code first will win. UnpackAnalyzerTags(); // Find starting place, based on analyzer tags. // // We only set the "visited" flag on the instruction start, so if the user // puts a code start in the middle of an instruction, we will find it and // treat it as an entry point. (This is useful for embedded instructions // that are branched to by code we aren't able to detect.) int searchStart = FindFirstUnvisitedInstruction(0); while (searchStart >= 0) { mAnattribs[searchStart].IsEntryPoint = true; mAnattribs[searchStart].StatusFlags = mEntryFlags; mAnattribs[searchStart].ApplyStatusFlags(mStatusFlagOverrides[searchStart]); int offset = searchStart; while (true) { bool embedded = (mAnattribs[offset].IsInstruction && !mAnattribs[offset].IsVisited); LogI(offset, "Scan chunk (vis=" + mAnattribs[offset].IsVisited + " chg=" + mAnattribs[offset].IsChanged + (embedded ? " embedded " : "") + ")"); AnalyzeSegment(offset, scanOffsets); // Did anything new get added? if (scanOffsets.Count == 0) { break; } // Pop one off the end. int lastItem = scanOffsets.Count - 1; offset = scanOffsets[lastItem]; scanOffsets.RemoveAt(lastItem); } searchStart = FindFirstUnvisitedInstruction(searchStart); } if (mScriptManager != null) { mScriptManager.UnprepareScripts(); } mScriptSupport.Shutdown(); MarkUnexecutedEmbeddedCode(); } /// /// Prepare a list of relevant extension scripts. /// private void PrepareScripts() { if (mScriptManager == null) { // Currently happens for regression tests with no external files. mScriptArray = new IPlugin[0]; mPluginCaps = new PluginCap[0]; return; } // Include all scripts. mScriptArray = mScriptManager.GetAllInstances().ToArray(); mPluginCaps = new PluginCap[mScriptArray.Length]; for (int i = 0; i < mScriptArray.Length; i++) { PluginCap cap = PluginCap.NONE; if (mScriptArray[i] is IPlugin_InlineJsr) { cap |= PluginCap.JSR; } if (mScriptArray[i] is IPlugin_InlineJsl) { cap |= PluginCap.JSL; } if (mScriptArray[i] is IPlugin_InlineBrk) { cap |= PluginCap.BRK; } mPluginCaps[i] = cap; } // Prep them. mScriptManager.PrepareScripts(mScriptSupport); } /// /// Sets the address for every byte in the input. /// private void SetAddresses() { IEnumerator addrIter = mAddrMap.AddressChangeIterator; addrIter.MoveNext(); int addr = 0; bool nonAddr = false; bool addrChange = false; for (int offset = 0; offset < mAnattribs.Length; offset++) { AddressMap.AddressChange change = addrIter.Current; // Process all start events at this offset. The new address takes effect // immediately. while (change != null && change.IsStart && change.Offset == offset) { addr = change.Address; if (addr == Address.NON_ADDR) { addr = 0; nonAddr = true; } else { nonAddr = false; } addrChange = true; addrIter.MoveNext(); change = addrIter.Current; } mAnattribs[offset].Address = addr++; mAnattribs[offset].IsAddrRegionChange = addrChange; mAnattribs[offset].IsNonAddressable = nonAddr; addrChange = false; // Process all end events at this offset. The new address and "address // region change" flag take effect on the *following* offset. while (change != null && !change.IsStart && change.Offset == offset) { addr = change.Address; if (addr == Address.NON_ADDR) { addr = 0; nonAddr = true; } else { nonAddr = false; } addrChange = true; addrIter.MoveNext(); change = addrIter.Current; } } } /// /// Sets the "is xxxxx" flags on analyzer-tagged entries, so that the code analyzer /// can find them easily. /// private void UnpackAnalyzerTags() { Debug.Assert(mAnalyzerTags.Length == mAnattribs.Length); int offset = 0; foreach (AnalyzerTag atag in mAnalyzerTags) { switch (atag) { case AnalyzerTag.Code: // Set the IsInstruction flag to prevent inline data from being // placed here. OpDef op = mCpuDef.GetOpDef(mFileData[offset]); if (op == OpDef.OpInvalid) { // Might want to set the "has tag" value anyway, since it won't // appear in the "Info" window if we don't. Or maybe we need a // message about "invisible" code start tags? LogI(offset, "Ignoring code start tag on illegal opcode"); } else { mAnattribs[offset].HasAnalyzerTag = true; mAnattribs[offset].IsInstruction = true; } break; case AnalyzerTag.Data: // Tells the code analyzer to stop. mAnattribs[offset].HasAnalyzerTag = true; mAnattribs[offset].IsData = true; break; case AnalyzerTag.InlineData: // Tells the code analyzer to walk across these. mAnattribs[offset].HasAnalyzerTag = true; mAnattribs[offset].IsInlineData = true; break; case AnalyzerTag.None: break; default: Debug.Assert(false); break; } offset++; } } /// /// Finds the first offset that is tagged as code start but hasn't yet been visited. /// /// This might be in the middle of an already-visited instruction. /// /// Offset at which to start the search. /// Offset found. private int FindFirstUnvisitedInstruction(int start) { for (int i = start; i < mAnattribs.Length; i++) { if (mAnattribs[i].HasAnalyzerTag && mAnalyzerTags[i] == AnalyzerTag.Code && !mAnattribs[i].IsVisited) { LogD(i, "Unvisited code start tag"); if (mAnattribs[i].IsData || mAnattribs[i].IsInlineData) { // Maybe the user put a code start tag on something that was // later recognized as inline data? Shouldn't have been allowed. LogW(i, "Weird: code start tag on data/inline"); continue; } return i; } } return -1; } /// /// Finds bits of code that are part of embedded instructions but not actually /// executed, and marks them as inline data. /// private void MarkUnexecutedEmbeddedCode() { // The problem arises when you have a line like 4C 60 EA, with a branch to the // middle byte. The formatter will print "JMP $EA60", then "