/*
* Copyright 2019 faddenSoft
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.Diagnostics;
using Asm65;
using CommonUtil;
using PluginCommon;
using SourceGen.Sandbox;
namespace SourceGen {
///
/// Instruction analyzer.
///
/// All data held in this object is transient, and will be discarded when analysis
/// completes. All user-defined values should be held elsewhere and provided as inputs
/// to the analyzer. Any change that merits re-analysis should be handled by creating a
/// new instance of this object.
///
/// See the comments at the top of UndoableChange for a list of things that can
/// mandate code re-analysis.
///
///
/// This invokes methods in extension scripts to handle things like inline data
/// following a JSR. The added cost is generally low, because the AppDomain security
/// sandbox doesn't add a lot of overhead. Unfortunately this approach is deprecated
/// by Microsoft and may break or become unavailable. If that happens, and we have to
/// switch to a sandbox approach with significant overhead, we will most likely want
/// to move the code analyzer itself into the sandbox.
///
/// For this reason it's best to minimize direct interaction between the code here and
/// that elsewhere in the program.
///
public class CodeAnalysis {
///
/// Analyzer tags are specified by the user. They identify an offset as being the
/// start or end of an executable code region, or part of an inline data block.
///
/// The tags are not used directly by the data analyzer, but the effects they
/// have on the Anattrib array are.
///
///
/// THESE VALUES ARE SERIALIZED to the project data file. They cannot be renamed
/// without writing a translator in ProjectFile.
///
public enum AnalyzerTag : sbyte {
// No tag. Default value populated in new arrays.
None = 0,
// Byte is an instruction. If the code analyzer doesn't find this
// naturally, it will be scanned.
Code,
// Byte is inline data. Execution skips over the byte.
InlineData,
// Byte is data. Execution halts.
Data
}
///
/// Class for handling callbacks from extension scripts.
///
private class ScriptSupport : MarshalByRefObject, PluginCommon.IApplication {
private CodeAnalysis mOuter;
public ScriptSupport(CodeAnalysis ca) {
mOuter = ca;
}
///
/// Call this when analysis is complete, to ensure that over-active scripts
/// can't keep doing things. (This is not part of IApplication.)
///
public void Shutdown() {
mOuter = null;
}
public void ReportError(string msg) {
DebugLog(msg);
}
public void DebugLog(string msg) {
mOuter.mDebugLog.LogI("PLUGIN: " + msg);
}
public bool SetOperandFormat(int offset, DataSubType subType, string label) {
return mOuter.SetOperandFormat(offset, subType, label);
}
public bool SetInlineDataFormat(int offset, int length, DataType type,
DataSubType subType, string label) {
return mOuter.SetInlineDataFormat(offset, length, type, subType, label);
}
}
///
/// Extension script manager.
///
private ScriptManager mScriptManager;
///
/// Local object that implements the IApplication interface for plugins.
///
private ScriptSupport mScriptSupport;
///
/// List of interesting plugins. If we have plugins that don't do code inlining we
/// can ignore them. (I'm using an array instead of a List<IPlugin> as a
/// micro-optimization; see https://stackoverflow.com/a/454923/294248 .)
///
private IPlugin[] mScriptArray;
[Flags]
private enum PluginCap { NONE = 0, JSR = 1 << 0, JSL = 1 << 1, BRK = 1 << 2 };
private PluginCap[] mPluginCaps;
///
/// CPU to use when analyzing data.
///
private CpuDef mCpuDef;
///
/// Map of offsets to addresses.
///
private AddressMap mAddrMap;
///
/// Reference to 65xx data.
///
private byte[] mFileData;
///
/// Attributes, one per byte in input file.
///
private Anattrib[] mAnattribs;
///
/// Reference to analyzer tag array, one entry per byte.
///
private AnalyzerTag[] mAnalyzerTags;
///
/// Reference to status flag override array, one entry per byte.
///
private StatusFlags[] mStatusFlagOverrides;
///
/// Initial status flags to use at entry points.
///
private StatusFlags mEntryFlags;
///
/// User-configurable analysis parameters.
///
private ProjectProperties.AnalysisParameters mAnalysisParameters;
///
/// Debug trace log.
///
private DebugLog mDebugLog = new DebugLog(DebugLog.Priority.Silent);
///
/// Constructor.
///
/// 65xx code stream.
/// CPU definition to use when interpreting code.
/// Anattrib array. Expected to be newly allocated, all
/// entries set to default values.
/// Map of offsets to addresses.
/// Analyzer tags, one per byte.
/// Status flag overrides for instruction-start
/// bytes.
/// Status flags to use at code entry points.
/// Extension script manager.
/// Analysis parameters.
/// Object that receives debug log messages.
public CodeAnalysis(byte[] data, CpuDef cpuDef, Anattrib[] anattribs,
AddressMap addrMap, AnalyzerTag[] atags, StatusFlags[] statusFlagOverrides,
StatusFlags entryFlags, ProjectProperties.AnalysisParameters parms,
ScriptManager scriptMan, DebugLog debugLog) {
mFileData = data;
mCpuDef = cpuDef;
mAnattribs = anattribs;
mAddrMap = addrMap;
mAnalyzerTags = atags;
mStatusFlagOverrides = statusFlagOverrides;
mEntryFlags = entryFlags;
mScriptManager = scriptMan;
mAnalysisParameters = parms;
mDebugLog = debugLog;
mScriptSupport = new ScriptSupport(this);
}
// Internal log functions. If we're concerned about performance overhead due to
// call-site string concatenation, we can #ifdef these to nothing in release builds,
// which should allow the compiler to elide the concat.
#if false
private void LogV(int offset, string msg) {
if (mDebugLog.IsLoggable(DebugLog.Priority.Verbose)) {
mDebugLog.LogV("+" + offset.ToString("x6") + " " + msg);
}
}
#else
private void LogV(int offset, string msg) { }
#endif
#if true
private void LogD(int offset, string msg) {
if (mDebugLog.IsLoggable(DebugLog.Priority.Debug)) {
mDebugLog.LogD("+" + offset.ToString("x6") + " " + msg);
}
}
private void LogI(int offset, string msg) {
if (mDebugLog.IsLoggable(DebugLog.Priority.Info)) {
mDebugLog.LogI("+" + offset.ToString("x6") + " " + msg);
}
}
private void LogW(int offset, string msg) {
if (mDebugLog.IsLoggable(DebugLog.Priority.Warning)) {
mDebugLog.LogW("+" + offset.ToString("x6") + " " + msg);
}
}
private void LogE(int offset, string msg) {
if (mDebugLog.IsLoggable(DebugLog.Priority.Error)) {
mDebugLog.LogE("+" + offset.ToString("x6") + " " + msg);
}
}
#else
private void LogD(int offset, string msg) { }
private void LogI(int offset, string msg) { }
private void LogW(int offset, string msg) { }
private void LogE(int offset, string msg) { }
#endif
///
/// Analyze a blob of code and data, annotating all code areas.
///
/// Also identifies data embedded in code, e.g. parameter blocks following a JSR,
/// with the help of extension scripts.
///
/// Failing here can leave us in a strange state, so prefer to work around unexpected
/// inputs rather than bailing entirely.
///
public void Analyze() {
List scanOffsets = new List();
mDebugLog.LogI("Analyzing code: " + mFileData.Length + " bytes, CPU=" + mCpuDef.Name);
PrepareScripts();
SetAddresses();
// Set values in the anattrib array based on the user-specified analyzer tags.
// This tells us to stop processing or skip over bytes as we work. We set values
// for the code start tags so we can show them in the "info" window.
//
// The data recognizers may spot additional inline data offsets as we work. This
// can cause a race if it mis-identifies code that is also a branch target;
// whichever marks the code first will win.
UnpackAnalyzerTags();
// Find starting place, based on analyzer tags.
//
// We only set the "visited" flag on the instruction start, so if the user
// puts a code start in the middle of an instruction, we will find it and
// treat it as an entry point. (This is useful for embedded instructions
// that are branched to by code we aren't able to detect.)
int searchStart = FindFirstUnvisitedInstruction(0);
while (searchStart >= 0) {
mAnattribs[searchStart].IsEntryPoint = true;
mAnattribs[searchStart].StatusFlags = mEntryFlags;
mAnattribs[searchStart].ApplyStatusFlags(mStatusFlagOverrides[searchStart]);
int offset = searchStart;
while (true) {
bool embedded = (mAnattribs[offset].IsInstruction &&
!mAnattribs[offset].IsVisited);
LogI(offset, "Scan chunk (vis=" + mAnattribs[offset].IsVisited +
" chg=" + mAnattribs[offset].IsChanged +
(embedded ? " embedded " : "") + ")");
AnalyzeSegment(offset, scanOffsets);
// Did anything new get added?
if (scanOffsets.Count == 0) {
break;
}
// Pop one off the end.
int lastItem = scanOffsets.Count - 1;
offset = scanOffsets[lastItem];
scanOffsets.RemoveAt(lastItem);
}
searchStart = FindFirstUnvisitedInstruction(searchStart);
}
if (mScriptManager != null) {
mScriptManager.UnprepareScripts();
}
mScriptSupport.Shutdown();
MarkUnexecutedEmbeddedCode();
}
///
/// Prepare a list of relevant extension scripts.
///
private void PrepareScripts() {
if (mScriptManager == null) {
// Currently happens for regression tests with no external files.
mScriptArray = new IPlugin[0];
mPluginCaps = new PluginCap[0];
return;
}
// Include all scripts.
mScriptArray = mScriptManager.GetAllInstances().ToArray();
mPluginCaps = new PluginCap[mScriptArray.Length];
for (int i = 0; i < mScriptArray.Length; i++) {
PluginCap cap = PluginCap.NONE;
if (mScriptArray[i] is IPlugin_InlineJsr) {
cap |= PluginCap.JSR;
}
if (mScriptArray[i] is IPlugin_InlineJsl) {
cap |= PluginCap.JSL;
}
if (mScriptArray[i] is IPlugin_InlineBrk) {
cap |= PluginCap.BRK;
}
mPluginCaps[i] = cap;
}
// Prep them.
mScriptManager.PrepareScripts(mScriptSupport);
}
///
/// Sets the address for every byte in the input.
///
private void SetAddresses() {
IEnumerator addrIter = mAddrMap.AddressChangeIterator;
addrIter.MoveNext();
int addr = 0;
bool nonAddr = false;
bool addrChange = false;
for (int offset = 0; offset < mAnattribs.Length; offset++) {
AddressMap.AddressChange change = addrIter.Current;
// Process all start events at this offset. The new address takes effect
// immediately.
while (change != null && change.IsStart && change.Offset == offset) {
addr = change.Address;
if (addr == Address.NON_ADDR) {
addr = 0;
nonAddr = true;
} else {
nonAddr = false;
}
addrChange = true;
addrIter.MoveNext();
change = addrIter.Current;
}
mAnattribs[offset].Address = addr++;
mAnattribs[offset].IsAddrRegionChange = addrChange;
mAnattribs[offset].IsNonAddressable = nonAddr;
addrChange = false;
// Process all end events at this offset. The new address and "address
// region change" flag take effect on the *following* offset.
while (change != null && !change.IsStart && change.Offset == offset) {
addr = change.Address;
if (addr == Address.NON_ADDR) {
addr = 0;
nonAddr = true;
} else {
nonAddr = false;
}
addrChange = true;
addrIter.MoveNext();
change = addrIter.Current;
}
}
}
///
/// Sets the "is xxxxx" flags on analyzer-tagged entries, so that the code analyzer
/// can find them easily.
///
private void UnpackAnalyzerTags() {
Debug.Assert(mAnalyzerTags.Length == mAnattribs.Length);
int offset = 0;
foreach (AnalyzerTag atag in mAnalyzerTags) {
switch (atag) {
case AnalyzerTag.Code:
// Set the IsInstruction flag to prevent inline data from being
// placed here.
OpDef op = mCpuDef.GetOpDef(mFileData[offset]);
if (op == OpDef.OpInvalid) {
// Might want to set the "has tag" value anyway, since it won't
// appear in the "Info" window if we don't. Or maybe we need a
// message about "invisible" code start tags?
LogI(offset, "Ignoring code start tag on illegal opcode");
} else {
mAnattribs[offset].HasAnalyzerTag = true;
mAnattribs[offset].IsInstruction = true;
}
break;
case AnalyzerTag.Data:
// Tells the code analyzer to stop.
mAnattribs[offset].HasAnalyzerTag = true;
mAnattribs[offset].IsData = true;
break;
case AnalyzerTag.InlineData:
// Tells the code analyzer to walk across these.
mAnattribs[offset].HasAnalyzerTag = true;
mAnattribs[offset].IsInlineData = true;
break;
case AnalyzerTag.None:
break;
default:
Debug.Assert(false);
break;
}
offset++;
}
}
///
/// Finds the first offset that is tagged as code start but hasn't yet been visited.
///
/// This might be in the middle of an already-visited instruction.
///
/// Offset at which to start the search.
/// Offset found.
private int FindFirstUnvisitedInstruction(int start) {
for (int i = start; i < mAnattribs.Length; i++) {
if (mAnattribs[i].HasAnalyzerTag && mAnalyzerTags[i] == AnalyzerTag.Code &&
!mAnattribs[i].IsVisited) {
LogD(i, "Unvisited code start tag");
if (mAnattribs[i].IsData || mAnattribs[i].IsInlineData) {
// Maybe the user put a code start tag on something that was
// later recognized as inline data? Shouldn't have been allowed.
LogW(i, "Weird: code start tag on data/inline");
continue;
}
return i;
}
}
return -1;
}
///
/// Finds bits of code that are part of embedded instructions but not actually
/// executed, and marks them as inline data.
///
private void MarkUnexecutedEmbeddedCode() {
// The problem arises when you have a line like 4C 60 EA, with a branch to the
// middle byte. The formatter will print "JMP $EA60", then "