/* * Copyright 2019 faddenSoft * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Collections.Generic; using System.Diagnostics; namespace SourceGen { /// /// Given a list of LocalVariableTables, this determines the mapping of values to symbols /// at a specific offset. /// /// /// We guarantee that the label will be unique within its scope. This happens at two /// different levels: /// (1) If the local variable label is present in the main symbol table, we use the /// "de-duplication" table to remap it. We try not to let this happen, but it can. /// The symbol table is latched when the object is constructed. /// (2) If the assembler doesn't define a way to re-use variable names, we make them /// globally unique. [currently not needed] /// /// De-duplication changes the label in *most* circumstances. We want to show the de-dup /// form everywhere except for the LvTable editor and the Lv editor. Using the correct /// string at the correct time is necessary for some basic editor operations: /// - Double-click on the opcode of LDA [lvar]. The selection should jump to the specific /// entry in the LvTable. /// - Double-click on the operand of LDA [lvar]. The instruction operand editor should /// open and offer to edit the de-dup form. Clicking on the shortcut button should open /// the Lv editor, with the original label shown (and perhaps a warning about /// non-uniqueness). /// - Double-click on the LvTable. The table listing should show the original label. /// - Click on a de-duped entry and verify that the correct cross-references exist. /// /// To reduce confusion, the fact that something has been de-duped should be obvious. /// /// A few quick clicks in the 2019-local-variables test should confirm these. /// public class LocalVariableLookup { /// /// List of tables. The table's file offset is used as the key. /// private SortedList mLvTables; /// /// Reference to project, so we can query the Anattrib array to identify "hidden" tables. /// private DisasmProject mProject; /// /// Set to true when generating symbols for assemblers like 64tass, which assign a /// special meaning to labels with leading underscores. /// private bool mMaskLeadingUnderscores; /// /// Set to true if we want all variables to be globally unique (because the assembler /// can't redefine them). /// private bool mDoUniquify; /// /// List of all non-variable symbols, for uniquification. This is generated from the /// project symbol table. When generating assembly sources, the labels are transformed /// through the label map. /// private Dictionary mAllNvSymbols; private Dictionary mLabelMap; /// /// Label uniquification helper. /// /// The BaseLabel does not change, but Label is updated by MakeUnique. /// /// /// LvLookup is run multiple times, and can be restarted in the middle of a run. It's /// essential that UniqueLabel behaves deterministically. For this to happen, the /// contents of SymbolTable can't change in a way that affects the outcome unless it /// also causes us to redo the uniquification. This mostly means that we have to be /// very careful about creating duplicate symbols, so that we don't get halfway through /// the analysis pass and invalidate our previous work. It's best to leave /// uniquification disabled until we're generating assembly source code. /// /// The issues also make it hard to do the uniquification once, rather than every time we /// walk the code. Not all symbol changes cause a re-analysis (e.g. renaming a user /// label does not), and we don't want to fill the symbol table with the uniquified /// names because it could block user labels that would otherwise be valid. /// private class UniqueLabel { public string BaseLabel { get; private set; } public string Label { get; private set; } private int Counter { get; set; } public UniqueLabel(string baseLabel) { Label = BaseLabel = baseLabel; Counter = 0; } /// /// Updates the Label to be unique. Call this when a symbol is defined or /// re-defined. /// /// Symbol table, for uniqueness check. public void MakeUnique(Dictionary allNvSymbols) { // The main symbol table might have user-supplied labels like "ptr_2", so we // need to keep testing against that. However, it should not be possible for // us to clash with other uniquified variables. So we don't need to check // for clashes in the UniqueLabel list. // // It *is* possible to clash with other variable base names, so we can't // exclude variables from our SymbolTable lookup. string testLabel; do { Counter++; testLabel = BaseLabel + "_" + Counter; } while (allNvSymbols.TryGetValue(testLabel, out Symbol unused)); Label = testLabel; } } private Dictionary mUniqueLabels; /// /// Duplicate label re-map. This is applied before uniquification. /// /// /// It's hard to do this as part of uniquification because the remapped base name ends /// up in the symbol table, and the uniquifier isn't able to tell that the entry in the /// symbol table is itself. The logic is simpler if we just rename the label before /// the uniquifier ever sees it. /// /// I feel like there has to be a simpler way to do this. This'll do for now. /// private Dictionary mDupRemap; /// /// Most recently processed offset. /// private int mRecentOffset; /// /// Symbols defined at mRecentOffset. /// private List mRecentSymbols; /// /// Cumulative symbols defined at the current offset. /// private LocalVariableTable mCurrentTable; // Next point of interest. private int mNextLvtIndex; private int mNextLvtOffset; /// /// Constructor. /// /// List of tables from the DisasmProject. /// Project reference. /// Label map dictionary, used to rename the labels in /// the project symbol table. May be null. /// If true, labels with leading underscores /// will be prefixed. /// Set to true if variable names cannot be redefined. public LocalVariableLookup(SortedList lvTables, DisasmProject project, Dictionary labelMap, bool maskLeadingUnderscores, bool uniquify) { mLvTables = lvTables; mProject = project; mLabelMap = labelMap; mMaskLeadingUnderscores = maskLeadingUnderscores; mDoUniquify = uniquify; mCurrentTable = new LocalVariableTable(); mDupRemap = new Dictionary(); if (uniquify) { mUniqueLabels = new Dictionary(); } Reset(); } public void Reset(bool rebuildSyms = true) { mRecentOffset = -1; mRecentSymbols = null; mCurrentTable.Clear(); mUniqueLabels?.Clear(); mDupRemap.Clear(); if (mLvTables.Count == 0) { mNextLvtIndex = -1; mNextLvtOffset = mProject.FileDataLength; } else { mNextLvtIndex = 0; mNextLvtOffset = mLvTables.Keys[0]; } CreateAllSymbolsDict(); } private void CreateAllSymbolsDict() { // TODO(someday): we don't need to regenerate the all-symbols list if the list // of symbols hasn't actually changed. Currently no way to tell. Dictionary labelMap = mLabelMap; SymbolTable symTab = mProject.SymbolTable; mAllNvSymbols = new Dictionary(symTab.Count); foreach (Symbol sym in symTab) { if (sym.SymbolSource == Symbol.Source.Variable) { continue; } if (labelMap != null && labelMap.TryGetValue(sym.Label, out string newLabel)) { // Non-unique labels may map multiple entries to a single entry. That's // fine; our goal here is just to avoid duplication. Besides, any symbols // being output as locals will have the local prefix character and won't // be a match. mAllNvSymbols[newLabel] = sym; } else { mAllNvSymbols[sym.Label] = sym; } } } /// /// Gets the symbol associated with the operand of an instruction. /// /// Offset of start of instruction. /// Operand value. /// Operand type. Should be ExternalAddress for DP ops, or /// Constant for StackRel ops. /// Symbol, or null if no match found. public DefSymbol GetSymbol(int offset, int operandValue, Symbol.Type type) { AdvanceToOffset(offset); return mCurrentTable.GetByValueRange(operandValue, 1, type); } /// /// Gets the symbol associated with a symbol reference. If uniquification is enabled, /// the unique-label map for the specified offset will be used to transform the /// symbol reference. /// /// Offset of start of instruction. /// Reference to symbol. /// Symbol, or null if no match found. public DefSymbol GetSymbol(int offset, WeakSymbolRef symRef) { AdvanceToOffset(offset); // The symRef uses the non-uniquified symbol, so we need to get the unique value at // the current offset. We may need to do this even when variables can be // redefined, because we might have a variable that's a duplicate of a user label // or project symbol. // Start by applying the de-duplication map. string label = symRef.Label; if (mDupRemap.TryGetValue(symRef.Label, out string remap)) { label = remap; } //Debug.WriteLine("GetSymbol " + symRef.Label + " -> " + label); if (mUniqueLabels != null && mUniqueLabels.TryGetValue(label, out UniqueLabel ulab)) { //Debug.WriteLine(" Unique var " + symRef.Label + " -> " + ulab.Label); label = ulab.Label; } DefSymbol defSym = mCurrentTable.GetByLabel(label); // In theory this is okay, but in practice the only things asking for symbols are // entirely convinced that the symbol exists here. So this is probably a bug. Debug.Assert(defSym != null); return defSym; } /// /// Identifies the LocalVariableTable that defined the symbol reference. /// /// Offset at which the symbol was referenced. /// Reference to symbol. /// Table index, or -1 if not found. public int GetDefiningTableOffset(int offset, WeakSymbolRef symRef) { // Get mDupRemap et. al. into the right state. AdvanceToOffset(offset); // symRef is the non-uniquified, de-duplicated symbol that was generated // during the analysis pass. We either need to un-de-duplicate the label, // or de-duplicate what we pull out of the Lv tables. The former requires // a linear string search but will be faster if there are a lot of tables. string label = UnDeDuplicate(symRef.Label); // Walk backward through the list of tables until we find a match. IList keys = mLvTables.Keys; for (int i = keys.Count - 1; i >= 0; i--) { if (keys[i] > offset) { // table comes after the point of reference continue; } if (mLvTables.Values[i].GetByLabel(label) != null) { // found it return keys[i]; } } // if we didn't find it, it doesn't exist... right? Debug.Assert(mCurrentTable.GetByLabel(label) == null); return -1; } private string UnDeDuplicate(string label) { foreach (KeyValuePair kvp in mDupRemap) { if (kvp.Value == label) { return kvp.Key; } } return label; } /// /// Restores a de-duplicated symbol to original form. /// /// /// Another kluge on the de-duplication system. This is used by the instruction /// operand editor's "edit variable" shortcut mechanism, because trying to edit the /// DefSymbol with the de-duplicated name ends badly. /// /// Symbol to un-de-duplicate. /// Original or un-de-duplicated symbol. public DefSymbol GetOriginalForm(DefSymbol sym) { string orig = UnDeDuplicate(sym.Label); if (orig == sym.Label) { return sym; } return new DefSymbol(sym, orig); } /// /// Gets a LocalVariableTable that is the result of merging all tables up to this point. /// /// Target offset. /// Combined table. public LocalVariableTable GetMergedTableAtOffset(int offset) { AdvanceToOffset(offset); return mCurrentTable; } /// /// Finds the closest table that is defined at or before the specified offset. Will /// attempt to only return un-hidden tables, but will return a hidden table if no /// others are available. /// /// Target offset. /// The table's definition offset, or -1 if no tables were defined before this /// point. public int GetNearestTableOffset(int offset) { int nearest = -1; int nearestUnhidden = -1; // Could do a smarter search, but I'm expecting the set to be small. foreach (KeyValuePair kvp in mLvTables) { if (kvp.Key > offset) { break; } nearest = kvp.Key; if (mProject.GetAnattrib(nearest).IsStart) { nearestUnhidden = nearest; } } if (nearestUnhidden >= 0) { return nearestUnhidden; } else { return nearest; } } /// /// Generates a list of variables defined at the specified offset, if a table is /// associated with that offset. /// /// File data offset. /// List of symbols, uniquified if desired, or null if no LocalVariableTable /// exists at the specified offset. public List GetVariablesDefinedAtOffset(int offset) { AdvanceToOffset(offset); if (mRecentOffset == offset) { return mRecentSymbols; } return null; } /// /// Updates internal state to reflect the state of the world at the specified offset. /// /// /// When the offset is greater than or equal to its value on a previous call, we can /// do an incremental update. If the offset moves backward, we have to reset and walk /// forward again. /// /// Target offset. private void AdvanceToOffset(int targetOffset) { if (mNextLvtIndex < 0) { return; } if (targetOffset < mRecentOffset) { // We went backwards. Reset(false); } while (mNextLvtOffset <= targetOffset) { if (!mProject.GetAnattrib(mNextLvtOffset).IsStart) { // Hidden table, ignore it. Debug.WriteLine("Ignoring LvTable at +" + mNextLvtOffset.ToString("x6")); } else { // Process this table. LocalVariableTable lvt = mLvTables.Values[mNextLvtIndex]; if (lvt.ClearPrevious) { mCurrentTable.Clear(); } // Create a list for GetVariablesDefinedAtOffset mRecentSymbols = new List(); mRecentOffset = mNextLvtOffset; // Merge the new entries into the work table. This automatically // discards entries that clash by name or value. for (int i = 0; i < lvt.Count; i++) { DefSymbol defSym = lvt[i]; string newLabel = defSym.Label; if (mMaskLeadingUnderscores && newLabel[0] == '_') { newLabel = AsmGen.LabelLocalizer.NO_UNDER_PFX + newLabel; } // Look for non-variable symbols with the same label. Ordinarily the // editor prevents this from happening, but there are ways to trick // the system (e.g. add a symbol while the LvTable is hidden, or have // a non-unique local promoted to global). We deal with it here. // // TODO(someday): this is not necessary for assemblers like Merlin 32 // that put variables in a separate namespace. if (mAllNvSymbols.TryGetValue(newLabel, out Symbol unused)) { Debug.WriteLine("Detected duplicate non-var label " + newLabel + " at +" + mNextLvtOffset.ToString("x6")); newLabel = GenerateDeDupLabel(newLabel); } if (newLabel != defSym.Label) { mDupRemap[defSym.Label] = newLabel; defSym = new DefSymbol(defSym, newLabel); } if (mDoUniquify) { if (mUniqueLabels.TryGetValue(defSym.Label, out UniqueLabel ulab)) { // We've seen this label before; generate a unique version by // increasing the appended number. ulab.MakeUnique(mAllNvSymbols); defSym = new DefSymbol(defSym, ulab.Label); } else { // Haven't seen this before. Add it to the unique-labels table. mUniqueLabels.Add(defSym.Label, new UniqueLabel(defSym.Label)); } } mCurrentTable.AddOrReplace(defSym); mRecentSymbols.Add(defSym); } //mCurrentTable.DebugDump(mNextLvtOffset); } // Update state to look for next table. mNextLvtIndex++; if (mNextLvtIndex < mLvTables.Keys.Count) { mNextLvtOffset = mLvTables.Keys[mNextLvtIndex]; } else { mNextLvtOffset = mProject.FileDataLength; // never reached } } } /// /// Generates a unique label for the duplicate remap table. /// /// /// We need to worry about clashes with the main symbol table, but we don't have to /// worry about other entries in the remap table because we know their baseLabels /// are different. /// private string GenerateDeDupLabel(string baseLabel) { string testLabel; int counter = 0; do { counter++; testLabel = baseLabel + "_DUP" + counter; } while (mAllNvSymbols.TryGetValue(testLabel, out Symbol unused)); return testLabel; } public static bool IsTableHidden(int offset, DisasmProject project) { return !project.GetAnattrib(offset).IsStart; } } }