From b3872986855ffbd5594e68416c32f28f04b15fe0 Mon Sep 17 00:00:00 2001 From: Andy McFadden Date: Mon, 13 Jan 2020 17:54:47 -0800 Subject: [PATCH] Fix various local variable de-duplication bugs In 1.5.0-dev1, as part of changes to the way label localization works, the local variable de-duplicator started checking against a filtered copy of the symbol table. Unfortunately it never re-generated the table, so a long-lived LocalVariableLookup (like the one used by LineListGen) would set up the dup map wrong and be inconsistent with other parts of the program. We now regenerate the table on every Reset(). The de-duplication stuff also had problems when opcodes and operands were double-clicked on. When the opcode is clicked, the selection should jump to the appropriate variable declaration, but it wasn't being found because the label generated in the list was in its original form. Fixed. When an instruction operand is double-clicked, the instruction operand editor opens with an "edit variable" shortcut. This was showing the de-duplicated name, which isn't necessarily a bad thing, but it was passing that value on to the DefSymbol editor, which thought it was being asked to create a new entry. Fixed. (Entering the editor through the LvTable editor works correctly, with nary a de-duplicated name in sight. You'll be forced to rename it because it'll fail the uniqueness test.) References to de-duplicated local variables were getting lost when the symbol's label was replaced (due largely to a convenient but flawed shortcut: xrefs are attached to DefSymbol objects). Fixed by linking the XrefSets. Given the many issues and their relative subtlety, I decided to make the modified names more obvious, and went back to the "_DUPn" naming strategy. (I'm also considering just making it an error and discarding conflicting entries during analysis... this is much more complicated than I expected it to be.) Quick tests can be performed in 2019-local-variables: - go to +000026, double-click on the opcode, confirm sel change - go to +000026, double-click on the operand, confirm orig name shown in shortcut and that shortcut opens editor with orig name - go to +00001a, down a line, click on PROJ_ZERO_DUP1 and confirm that it has a single reference (from +000026) - double-click on var table and confirm editing entry --- SourceGen/DefSymbol.cs | 9 +- SourceGen/LineListGen.cs | 19 +-- SourceGen/LocalVariableLookup.cs | 116 +++++++++++++----- SourceGen/LocalVariableTable.cs | 4 + .../Expected/2019-local-variables_64tass.S | 6 +- .../Expected/2019-local-variables_Merlin32.S | 6 +- .../Expected/2019-local-variables_acme.S | 10 +- .../Expected/2019-local-variables_cc65.S | 6 +- .../Expected/2023-non-unique-labels_64tass.S | 14 +-- .../2023-non-unique-labels_Merlin32.S | 6 +- .../Expected/2023-non-unique-labels_acme.S | 8 +- .../Expected/2023-non-unique-labels_cc65.S | 6 +- .../WpfGui/EditInstructionOperand.xaml.cs | 5 +- SourceGen/XrefSet.cs | 4 + 14 files changed, 149 insertions(+), 70 deletions(-) diff --git a/SourceGen/DefSymbol.cs b/SourceGen/DefSymbol.cs index 31a427d..4e2124e 100644 --- a/SourceGen/DefSymbol.cs +++ b/SourceGen/DefSymbol.cs @@ -294,6 +294,10 @@ namespace SourceGen { /// /// This can't be a simple Rename() function that uses a copy constructor because /// the label is in the base class. + /// + /// The Xrefs reference points to the actual XrefSet in the original. This is not + /// ideal, but it's the easiest way to keep xrefs working across Lv de-duplication + /// (you actually *want* xrefs added to copies to be held by the original). /// /// Source DefSymbol. /// Label to use. @@ -302,7 +306,10 @@ namespace SourceGen { defSym.LabelAnno, defSym.DataDescriptor.FormatSubType, defSym.DataDescriptor.Length, defSym.HasWidth, defSym.Comment, defSym.Direction, defSym.MultiMask, defSym.Tag) - { } + { + Debug.Assert(SymbolSource == Source.Variable); + Xrefs = defSym.Xrefs; + } /// /// Determines whether a symbol overlaps with a region. Useful for variables. diff --git a/SourceGen/LineListGen.cs b/SourceGen/LineListGen.cs index 0f89746..ee295ec 100644 --- a/SourceGen/LineListGen.cs +++ b/SourceGen/LineListGen.cs @@ -1397,13 +1397,13 @@ namespace SourceGen { return FormattedParts.CreateLongComment("BAD INDEX +" + offset.ToString("x6") + " sub=" + subLineIndex); } else { - // We use the entry directly from the LvTable, without LvLookup processing. - // This is good, because the entries in the display list match what the editor - // shows, but not quite right, because labels that are duplicates of - // non-variables (which ideally wouldn't happen) won't show their de-duplicated - // form. I think this is fine. - DefSymbol defSym = lvt[subLineIndex]; - // Use an operand length of 1 so things are shown as concisely as possible. + // Getting the symbol directly from the LvTable yields the original form, + // but we want the de-dup form. + //DefSymbol defSym = lvt[subLineIndex]; + List lvars = mLvLookup.GetVariablesDefinedAtOffset(offset); + DefSymbol defSym = lvars[subLineIndex]; + + // Use an operand length of 1 so the value is shown as concisely as possible. string addrStr = PseudoOp.FormatNumericOperand(mFormatter, mProject.SymbolTable, null, defSym.DataDescriptor, defSym.Value, 1, PseudoOp.FormatNumericOpFlags.None); @@ -1433,7 +1433,10 @@ namespace SourceGen { return null; } - return lvt[tableIndex]; + // Go through LvLookup to get de-dup form of labels. + //return lvt[tableIndex]; + List lvars = mLvLookup.GetVariablesDefinedAtOffset(offset); + return lvars[tableIndex]; } private FormattedParts[] GenerateStringLines(int offset, string popcode, diff --git a/SourceGen/LocalVariableLookup.cs b/SourceGen/LocalVariableLookup.cs index 8720490..224d714 100644 --- a/SourceGen/LocalVariableLookup.cs +++ b/SourceGen/LocalVariableLookup.cs @@ -30,6 +30,22 @@ namespace SourceGen { /// The symbol table is latched when the object is constructed. /// (2) If the assembler doesn't define a way to re-use variable names, we make them /// globally unique. [currently not needed] + /// + /// De-duplication changes the label in *most* circumstances. We want to show the de-dup + /// form everywhere except for the LvTable editor and the Lv editor. Using the correct + /// string at the correct time is necessary for some basic editor operations: + /// - Double-click on the opcode of LDA [lvar]. The selection should jump to the specific + /// entry in the LvTable. + /// - Double-click on the operand of LDA [lvar]. The instruction operand editor should + /// open and offer to edit the de-dup form. Clicking on the shortcut button should open + /// the Lv editor, with the original label shown (and perhaps a warning about + /// non-uniqueness). + /// - Double-click on the LvTable. The table listing should show the original label. + /// - Click on a de-duped entry and verify that the correct cross-references exist. + /// + /// To reduce confusion, the fact that something has been de-duped should be obvious. + /// + /// A few quick clicks in the 2019-local-variables test should confirm these. /// public class LocalVariableLookup { /// @@ -61,6 +77,8 @@ namespace SourceGen { /// private Dictionary mAllNvSymbols; + private Dictionary mLabelMap; + /// /// Label uniquification helper. /// @@ -145,7 +163,6 @@ namespace SourceGen { private int mNextLvtIndex; private int mNextLvtOffset; - /// /// Constructor. /// @@ -161,6 +178,7 @@ namespace SourceGen { bool maskLeadingUnderscores, bool uniquify) { mLvTables = lvTables; mProject = project; + mLabelMap = labelMap; mMaskLeadingUnderscores = maskLeadingUnderscores; mDoUniquify = uniquify; @@ -169,11 +187,31 @@ namespace SourceGen { if (uniquify) { mUniqueLabels = new Dictionary(); } - CreateAllSymbolsDict(labelMap); Reset(); } - private void CreateAllSymbolsDict(Dictionary labelMap) { + public void Reset(bool rebuildSyms = true) { + mRecentOffset = -1; + mRecentSymbols = null; + mCurrentTable.Clear(); + mUniqueLabels?.Clear(); + mDupRemap.Clear(); + if (mLvTables.Count == 0) { + mNextLvtIndex = -1; + mNextLvtOffset = mProject.FileDataLength; + } else { + mNextLvtIndex = 0; + mNextLvtOffset = mLvTables.Keys[0]; + } + CreateAllSymbolsDict(); + } + + private void CreateAllSymbolsDict() { + // TODO(someday): we don't need to regenerate the all-symbols list if the list + // of symbols hasn't actually changed. Currently no way to tell. + + Dictionary labelMap = mLabelMap; + SymbolTable symTab = mProject.SymbolTable; mAllNvSymbols = new Dictionary(symTab.Count); foreach (Symbol sym in symTab) { @@ -192,21 +230,6 @@ namespace SourceGen { } } - public void Reset() { - mRecentOffset = -1; - mRecentSymbols = null; - mCurrentTable.Clear(); - mUniqueLabels?.Clear(); - mDupRemap.Clear(); - if (mLvTables.Count == 0) { - mNextLvtIndex = -1; - mNextLvtOffset = mProject.FileDataLength; - } else { - mNextLvtIndex = 0; - mNextLvtOffset = mLvTables.Keys[0]; - } - } - /// /// Gets the symbol associated with the operand of an instruction. /// @@ -262,10 +285,15 @@ namespace SourceGen { /// Reference to symbol. /// Table index, or -1 if not found. public int GetDefiningTableOffset(int offset, WeakSymbolRef symRef) { - // symRef is the non-uniquified, non-de-duplicated symbol that was generated - // during the analysis pass. This matches the contents of the tables, so we don't - // need to transform it at all. - // + // Get mDupRemap et. al. into the right state. + AdvanceToOffset(offset); + + // symRef is the non-uniquified, de-duplicated symbol that was generated + // during the analysis pass. We either need to un-de-duplicate the label, + // or de-duplicate what we pull out of the Lv tables. The former requires + // a linear string search but will be faster if there are a lot of tables. + string label = UnDeDuplicate(symRef.Label); + // Walk backward through the list of tables until we find a match. IList keys = mLvTables.Keys; for (int i = keys.Count - 1; i >= 0; i--) { @@ -274,17 +302,44 @@ namespace SourceGen { continue; } - if (mLvTables.Values[i].GetByLabel(symRef.Label) != null) { + if (mLvTables.Values[i].GetByLabel(label) != null) { // found it return keys[i]; } } // if we didn't find it, it doesn't exist... right? - Debug.Assert(mCurrentTable.GetByLabel(symRef.Label) == null); + Debug.Assert(mCurrentTable.GetByLabel(label) == null); return -1; } + private string UnDeDuplicate(string label) { + foreach (KeyValuePair kvp in mDupRemap) { + if (kvp.Value == label) { + return kvp.Key; + } + } + return label; + } + + /// + /// Restores a de-duplicated symbol to original form. + /// + /// + /// Another kluge on the de-duplication system. This is used by the instruction + /// operand editor's "edit variable" shortcut mechanism, because trying to edit the + /// DefSymbol with the de-duplicated name ends badly. + /// + /// Symbol to un-de-duplicate. + /// Original or un-de-duplicated symbol. + public DefSymbol GetOriginalForm(DefSymbol sym) { + string orig = UnDeDuplicate(sym.Label); + if (orig == sym.Label) { + return sym; + } + return new DefSymbol(sym, orig); + } + /// /// Gets a LocalVariableTable that is the result of merging all tables up to this point. /// @@ -355,7 +410,7 @@ namespace SourceGen { } if (targetOffset < mRecentOffset) { // We went backwards. - Reset(); + Reset(false); } while (mNextLvtOffset <= targetOffset) { if (!mProject.GetAnattrib(mNextLvtOffset).IsStart) { @@ -392,7 +447,7 @@ namespace SourceGen { if (mAllNvSymbols.TryGetValue(newLabel, out Symbol unused)) { Debug.WriteLine("Detected duplicate non-var label " + newLabel + " at +" + mNextLvtOffset.ToString("x6")); - newLabel = DeDupLabel(newLabel); + newLabel = GenerateDeDupLabel(newLabel); } if (newLabel != defSym.Label) { @@ -432,12 +487,17 @@ namespace SourceGen { /// /// Generates a unique label for the duplicate remap table. /// - private string DeDupLabel(string baseLabel) { + /// + /// We need to worry about clashes with the main symbol table, but we don't have to + /// worry about other entries in the remap table because we know their baseLabels + /// are different. + /// + private string GenerateDeDupLabel(string baseLabel) { string testLabel; int counter = 0; do { counter++; - testLabel = baseLabel + "_" + counter; + testLabel = baseLabel + "_DUP" + counter; } while (mAllNvSymbols.TryGetValue(testLabel, out Symbol unused)); return testLabel; } diff --git a/SourceGen/LocalVariableTable.cs b/SourceGen/LocalVariableTable.cs index 7ca1510..e74bd68 100644 --- a/SourceGen/LocalVariableTable.cs +++ b/SourceGen/LocalVariableTable.cs @@ -62,6 +62,10 @@ namespace SourceGen { /// /// List of variables, sorted by label. /// + /// + /// This is the original form of the label. De-duplication and uniquification have + /// not been applied. + /// private SortedList mVarByLabel; /// diff --git a/SourceGen/SGTestData/Expected/2019-local-variables_64tass.S b/SourceGen/SGTestData/Expected/2019-local-variables_64tass.S index d24eb92..8f593fc 100644 --- a/SourceGen/SGTestData/Expected/2019-local-variables_64tass.S +++ b/SourceGen/SGTestData/Expected/2019-local-variables_64tass.S @@ -28,14 +28,14 @@ CONST_ZERO_VAR .var $f0 sta $f1,s eor 0 ora 240,s -PROJ_ZERO_1 .var $10 ;clash with project symbol -DPCODE_1 .var $80 ;clash with user label +PROJ_ZERO_DUP1 .var $10 ;clash with project symbol +DPCODE_DUP1 .var $80 ;clash with user label lda VAR_ZERO lda VAR_ZERO+1 lda VAR_TWO lda VAR_THREE lda $04 - lda PROJ_ZERO_1 + lda PROJ_ZERO_DUP1 lda $11 lda DPCODE ldx PROJ_ZERO diff --git a/SourceGen/SGTestData/Expected/2019-local-variables_Merlin32.S b/SourceGen/SGTestData/Expected/2019-local-variables_Merlin32.S index 2b38c45..ba48085 100644 --- a/SourceGen/SGTestData/Expected/2019-local-variables_Merlin32.S +++ b/SourceGen/SGTestData/Expected/2019-local-variables_Merlin32.S @@ -23,14 +23,14 @@ PROJ_ONE equ $01 ;project addr sta $f1,S eor 0 ora 240,S -]PROJ_ZERO_1 equ $10 ;clash with project symbol -]DPCODE_1 equ $80 ;clash with user label +]PROJ_ZERO_DUP1 equ $10 ;clash with project symbol +]DPCODE_DUP1 equ $80 ;clash with user label lda ]VAR_ZERO lda ]VAR_ZERO+1 lda ]VAR_TWO lda ]VAR_THREE lda $04 - lda ]PROJ_ZERO_1 + lda ]PROJ_ZERO_DUP1 lda $11 lda DPCODE ldx PROJ_ZERO diff --git a/SourceGen/SGTestData/Expected/2019-local-variables_acme.S b/SourceGen/SGTestData/Expected/2019-local-variables_acme.S index 9685340..f755693 100644 --- a/SourceGen/SGTestData/Expected/2019-local-variables_acme.S +++ b/SourceGen/SGTestData/Expected/2019-local-variables_acme.S @@ -31,8 +31,8 @@ PROJ_ONE = $01 ;project addr .VAR_ZERO = $00 .VAR_TWO = $02 .VAR_THREE = $03 -.PROJ_ZERO_1 = $10 ;clash with project symbol -.DPCODE_1 = $80 ;clash with user label +.PROJ_ZERO_DUP1 = $10 ;clash with project symbol +.DPCODE_DUP1 = $80 ;clash with user label .CONST_ZERO_VAR = $f0 lda .VAR_ZERO lda .VAR_ZERO+1 @@ -41,12 +41,12 @@ PROJ_ONE = $01 ;project addr .VAR_ZERO = $00 .VAR_TWO = $02 .VAR_THREE = $03 -.PROJ_ZERO_1 = $10 ;clash with project symbol -.DPCODE_1 = $80 ;clash with user label +.PROJ_ZERO_DUP1 = $10 ;clash with project symbol +.DPCODE_DUP1 = $80 ;clash with user label .CONST_ZERO_VAR = $f0 lda .VAR_THREE lda $04 - lda .PROJ_ZERO_1 + lda .PROJ_ZERO_DUP1 lda $11 lda+1 DPCODE !zone Z00002c diff --git a/SourceGen/SGTestData/Expected/2019-local-variables_cc65.S b/SourceGen/SGTestData/Expected/2019-local-variables_cc65.S index 73b8ba8..157f2fe 100644 --- a/SourceGen/SGTestData/Expected/2019-local-variables_cc65.S +++ b/SourceGen/SGTestData/Expected/2019-local-variables_cc65.S @@ -27,14 +27,14 @@ CONST_ZERO_VAR .set $f0 sta $f1,S eor 0 ora 240,S -PROJ_ZERO_1 .set $10 ;clash with project symbol -DPCODE_1 .set $80 ;clash with user label +PROJ_ZERO_DUP1 .set $10 ;clash with project symbol +DPCODE_DUP1 .set $80 ;clash with user label lda VAR_ZERO lda VAR_ZERO+1 lda VAR_TWO lda VAR_THREE lda $04 - lda PROJ_ZERO_1 + lda PROJ_ZERO_DUP1 lda $11 lda z:DPCODE ldx PROJ_ZERO diff --git a/SourceGen/SGTestData/Expected/2023-non-unique-labels_64tass.S b/SourceGen/SGTestData/Expected/2023-non-unique-labels_64tass.S index 1f80463..02b68e3 100644 --- a/SourceGen/SGTestData/Expected/2023-non-unique-labels_64tass.S +++ b/SourceGen/SGTestData/Expected/2023-non-unique-labels_64tass.S @@ -92,11 +92,11 @@ LDAL .byte $af .byte $10 .byte $00 nop -plain_1 .var $11 -X_under1_1 .var $12 +plain_DUP1 .var $11 +X_under1_DUP1 .var $12 X__dub1 .var $13 - lda plain_1 - lda X_under1_1 + lda plain_DUP1 + lda X_under1_DUP1 lda X__dub1 _plain lda _plain plain lda plain @@ -104,8 +104,8 @@ global8 dex bne plain X_under1 lda X_under1 _X__dub1 lda _X__dub1 -X_under1_1 .var $22 - lda plain_1 - lda X_under1_1 +X_under1_DUP1 .var $22 + lda plain_DUP1 + lda X_under1_DUP1 rts diff --git a/SourceGen/SGTestData/Expected/2023-non-unique-labels_Merlin32.S b/SourceGen/SGTestData/Expected/2023-non-unique-labels_Merlin32.S index 9f01dc4..bd11fea 100644 --- a/SourceGen/SGTestData/Expected/2023-non-unique-labels_Merlin32.S +++ b/SourceGen/SGTestData/Expected/2023-non-unique-labels_Merlin32.S @@ -91,10 +91,10 @@ LDAL dfb $af dfb $10 dfb $00 nop -]plain_1 equ $11 +]plain_DUP1 equ $11 ]_under1 equ $12 ]__dub1 equ $13 - lda ]plain_1 + lda ]plain_DUP1 lda ]_under1 lda ]__dub1 :plain lda :plain @@ -104,7 +104,7 @@ global8 dex X_under1 lda X_under1 :X__dub1 lda :X__dub1 ]_under1 equ $22 - lda ]plain_1 + lda ]plain_DUP1 lda ]_under1 rts diff --git a/SourceGen/SGTestData/Expected/2023-non-unique-labels_acme.S b/SourceGen/SGTestData/Expected/2023-non-unique-labels_acme.S index 3e60c40..38b8613 100644 --- a/SourceGen/SGTestData/Expected/2023-non-unique-labels_acme.S +++ b/SourceGen/SGTestData/Expected/2023-non-unique-labels_acme.S @@ -93,10 +93,10 @@ LDAL !byte $af !byte $00 nop !zone Z00009a -.plain_1 = $11 +.plain_DUP1 = $11 ._under1 = $12 .__dub1 = $13 - lda .plain_1 + lda .plain_DUP1 lda ._under1 lda .__dub1 @plain lda @plain @@ -106,10 +106,10 @@ global8 dex X_under1 lda X_under1 @X__dub1 lda @X__dub1 !zone Z0000af -.plain_1 = $11 +.plain_DUP1 = $11 .__dub1 = $13 ._under1 = $22 - lda .plain_1 + lda .plain_DUP1 lda ._under1 rts diff --git a/SourceGen/SGTestData/Expected/2023-non-unique-labels_cc65.S b/SourceGen/SGTestData/Expected/2023-non-unique-labels_cc65.S index deae6a7..577d395 100644 --- a/SourceGen/SGTestData/Expected/2023-non-unique-labels_cc65.S +++ b/SourceGen/SGTestData/Expected/2023-non-unique-labels_cc65.S @@ -93,10 +93,10 @@ LDAL: .byte $af .byte $10 .byte $00 nop -plain_1 .set $11 +plain_DUP1 .set $11 _under1 .set $12 __dub1 .set $13 - lda plain_1 + lda plain_DUP1 lda _under1 lda __dub1 @plain: lda @plain @@ -106,7 +106,7 @@ global8: dex X_under1: lda X_under1 @X__dub1: lda @X__dub1 _under1 .set $22 - lda plain_1 + lda plain_DUP1 lda _under1 rts diff --git a/SourceGen/WpfGui/EditInstructionOperand.xaml.cs b/SourceGen/WpfGui/EditInstructionOperand.xaml.cs index 4be05d1..3daee19 100644 --- a/SourceGen/WpfGui/EditInstructionOperand.xaml.cs +++ b/SourceGen/WpfGui/EditInstructionOperand.xaml.cs @@ -1151,13 +1151,14 @@ namespace SourceGen.WpfGui { } else { // Found a table. Do we have a matching symbol? ShowLvCreateEditButton = true; - mEditedLocalVar = lvLookup.GetSymbol(mOffset, mOperandValue, + DefSymbol existingVar = lvLookup.GetSymbol(mOffset, mOperandValue, mOpDef.IsDirectPageInstruction ? Symbol.Type.ExternalAddr : Symbol.Type.Constant); - if (mEditedLocalVar == null) { + if (existingVar == null) { ShowLvNoMatchFound = true; CreateEditLocalVariableText = CREATE_LOCAL_VARIABLE; } else { + mEditedLocalVar = lvLookup.GetOriginalForm(existingVar); ShowLvMatchFound = true; CreateEditLocalVariableText = EDIT_LOCAL_VARIABLE; LocalVariableLabel = mEditedLocalVar.Label; diff --git a/SourceGen/XrefSet.cs b/SourceGen/XrefSet.cs index 4f837a6..5bcf84b 100644 --- a/SourceGen/XrefSet.cs +++ b/SourceGen/XrefSet.cs @@ -141,5 +141,9 @@ namespace SourceGen { IEnumerator IEnumerable.GetEnumerator() { return ((IEnumerable)mRefs).GetEnumerator(); } + + public override string ToString() { + return "[XrefSet count=" + Count + "]"; + } } }