diff --git a/Asm65/CpuDef.cs b/Asm65/CpuDef.cs index 907d8d1..fc8c8d3 100644 --- a/Asm65/CpuDef.cs +++ b/Asm65/CpuDef.cs @@ -227,7 +227,7 @@ namespace Asm65 { /// Returns an entry from the OpDef array for the specified opcode, 0-255. (We could /// probably just make this the class indexer.) /// - /// Instruction opcode + /// Instruction opcode number (0-255). /// Instruction definition. public OpDef GetOpDef(int op) { return mOpDefs[op]; } diff --git a/SourceGen/AsmGen/AsmAcme.cs b/SourceGen/AsmGen/AsmAcme.cs index d6f0f3a..173d0f6 100644 --- a/SourceGen/AsmGen/AsmAcme.cs +++ b/SourceGen/AsmGen/AsmAcme.cs @@ -222,6 +222,7 @@ namespace SourceGen.AsmGen { mLocalizer.LocalPrefix = "@"; mLocalizer.Analyze(); } + mLocalizer.FixOpcodeLabels(); // Use UTF-8 encoding, without a byte-order mark. using (StreamWriter sw = new StreamWriter(pathName, false, new UTF8Encoding(false))) { diff --git a/SourceGen/AsmGen/AsmCc65.cs b/SourceGen/AsmGen/AsmCc65.cs index b6ebc06..f9ab61c 100644 --- a/SourceGen/AsmGen/AsmCc65.cs +++ b/SourceGen/AsmGen/AsmCc65.cs @@ -220,6 +220,7 @@ namespace SourceGen.AsmGen { mLocalizer.QuirkVariablesEndScope = true; mLocalizer.Analyze(); } + mLocalizer.FixOpcodeLabels(); // Use UTF-8 encoding, without a byte-order mark. using (StreamWriter sw = new StreamWriter(cfgName, false, new UTF8Encoding(false))) { diff --git a/SourceGen/AsmGen/AsmMerlin32.cs b/SourceGen/AsmGen/AsmMerlin32.cs index d344076..e9807f3 100644 --- a/SourceGen/AsmGen/AsmMerlin32.cs +++ b/SourceGen/AsmGen/AsmMerlin32.cs @@ -191,6 +191,7 @@ namespace SourceGen.AsmGen { mLocalizer.LocalPrefix = ":"; mLocalizer.Analyze(); } + //mLocalizer.FixOpcodeLabels(); // Use UTF-8 encoding, without a byte-order mark. using (StreamWriter sw = new StreamWriter(pathName, false, new UTF8Encoding(false))) { diff --git a/SourceGen/AsmGen/AsmTass64.cs b/SourceGen/AsmGen/AsmTass64.cs index 186e50c..6bff8ff 100644 --- a/SourceGen/AsmGen/AsmTass64.cs +++ b/SourceGen/AsmGen/AsmTass64.cs @@ -231,6 +231,7 @@ namespace SourceGen.AsmGen { mLocalizer.Analyze(); } mLocalizer.MaskLeadingUnderscores(); + mLocalizer.FixOpcodeLabels(); // Use UTF-8 encoding, without a byte-order mark. using (StreamWriter sw = new StreamWriter(pathName, false, new UTF8Encoding(false))) { diff --git a/SourceGen/AsmGen/LabelLocalizer.cs b/SourceGen/AsmGen/LabelLocalizer.cs index a7e12af..d426411 100644 --- a/SourceGen/AsmGen/LabelLocalizer.cs +++ b/SourceGen/AsmGen/LabelLocalizer.cs @@ -332,11 +332,12 @@ namespace SourceGen.AsmGen { /// This is necessary for assemblers like 64tass that use a leading underscore to /// indicate that a label should be local. /// - /// This may be called even if label localization is disabled. In that case we just - /// create an empty label map and populate as needed. - /// /// Only call this if underscores are used to indicate local labels. /// + /// + /// This may be called even if label localization is disabled. In that case we just + /// create an empty label map and populate as needed. + /// public void MaskLeadingUnderscores() { bool allGlobal = false; if (LabelMap == null) { @@ -383,8 +384,9 @@ namespace SourceGen.AsmGen { // Make sure it's unique. string uniqueLabel = newLabel; - int uval = 1; + int uval = 0; while (allLabels.ContainsKey(uniqueLabel)) { + uval++; uniqueLabel = newLabel + uval.ToString(); } allLabels.Add(uniqueLabel, uniqueLabel); @@ -397,5 +399,91 @@ namespace SourceGen.AsmGen { Debug.WriteLine("UMAP: allcount=" + allLabels.Count + " mapcount=" + LabelMap.Count); } + + /// + /// Remaps labels that match opcode names. Updated names will be added to LabelMap. + /// This should be run after localization and underscore concealment have finished. + /// + /// + /// Most assemblers don't like it if you create a label with the same name as an + /// opcode, e.g. "jmp LSR" doesn't work. We can use the label map to work around + /// the issue. + /// + /// Most assemblers regard mnemonics as case-insensitive, even if labels are + /// case-sensitive, so we want to remap both "lsr" and "LSR". + /// + /// This doesn't really have anything to do with label localization other than that + /// we're updating the label remap table. + /// + public void FixOpcodeLabels() { + if (LabelMap == null) { + LabelMap = new Dictionary(); + } + + // Create a searchable list of opcode names using the current CPU definition. + // (All tested assemblers that failed on opcode names only did so for names + // that were part of the current definition, e.g. "TSB" was accepted as a label + // when the CPU was set to 6502.) + Dictionary opnames = new Dictionary(); + Asm65.CpuDef cpuDef = mProject.CpuDef; + for (int i = 0; i < 256; i++) { + Asm65.OpDef op = cpuDef.GetOpDef(i); + // There may be multiple entries with the same name (e.g. "NOP"). That's fine. + opnames[op.Mnemonic.ToUpperInvariant()] = op; + } + + // Create a list of all labels, for uniqueness testing. If a label has been + // remapped, we add the remapped entry. + // (All tested assemblers that failed on opcode names only did so for names + // in their non-localized form. While "LSR" failed, "@LSR", "_LSR", ".LSR", etc. + // were accepted. So if it was remapped by the localizer, we don't need to + // worry about it.) + SortedList allLabels = new SortedList(); + for (int i = 0; i < mProject.FileDataLength; i++) { + Symbol sym = mProject.GetAnattrib(i).Symbol; + if (sym == null) { + continue; + } + LabelMap.TryGetValue(sym.Label, out string mapLabel); + if (mapLabel != null) { + allLabels.Add(mapLabel, mapLabel); + } else { + allLabels.Add(sym.Label, sym.Label); + } + } + + // Now run through the list of labels, looking for any that match opcode + // mnemonics. + for (int i = 0; i < mProject.FileDataLength; i++) { + Symbol sym = mProject.GetAnattrib(i).Symbol; + if (sym == null) { + // No label at this offset. + continue; + } + string cmpLabel = sym.Label; + if (LabelMap.TryGetValue(sym.Label, out string mapLabel)) { + cmpLabel = mapLabel; + } + + if (opnames.ContainsKey(cmpLabel.ToUpperInvariant())) { + //Debug.WriteLine("Remapping label (op mnemonic): " + sym.Label); + + int uval = 0; + string uniqueLabel; + do { + uval++; + uniqueLabel = cmpLabel + "_" + uval.ToString(); + } while (allLabels.ContainsKey(uniqueLabel)); + + allLabels.Add(uniqueLabel, uniqueLabel); + LabelMap.Add(sym.Label, uniqueLabel); + } + } + + if (LabelMap.Count == 0) { + // didn't do anything, lose the table + LabelMap = null; + } + } } } diff --git a/SourceGen/SGTestData/2012-label-localizer b/SourceGen/SGTestData/2012-label-localizer index 64db0bd..7a2358e 100644 Binary files a/SourceGen/SGTestData/2012-label-localizer and b/SourceGen/SGTestData/2012-label-localizer differ diff --git a/SourceGen/SGTestData/2012-label-localizer.dis65 b/SourceGen/SGTestData/2012-label-localizer.dis65 index f297062..cb76cb9 100644 --- a/SourceGen/SGTestData/2012-label-localizer.dis65 +++ b/SourceGen/SGTestData/2012-label-localizer.dis65 @@ -1,8 +1,8 @@ ### 6502bench SourceGen dis65 v1.0 ### { -"_ContentVersion":1,"FileDataLength":87,"FileDataCrc32":255194945,"ProjectProps":{ -"CpuName":"65816","IncludeUndocumentedInstr":false,"EntryFlags":32702671,"AnalysisParams":{ -"AnalyzeUncategorizedData":true,"MinCharsForString":4,"SeekNearbyTargets":true}, +"_ContentVersion":2,"FileDataLength":103,"FileDataCrc32":1381810255,"ProjectProps":{ +"CpuName":"65C02","IncludeUndocumentedInstr":false,"EntryFlags":32702671,"AutoLabelStyle":"Simple","AnalysisParams":{ +"AnalyzeUncategorizedData":true,"DefaultTextScanMode":"LowHighAscii","MinCharsForString":4,"SeekNearbyTargets":true,"SmartPlpHandling":true}, "PlatformSymbolFileIdentifiers":[],"ExtensionScriptFileIdentifiers":[],"ProjectSyms":{ "__ENABLE_LABEL_LOCALIZATION":{ "DataDescriptor":{ @@ -21,7 +21,7 @@ "Low":0,"High":0,"Hint":"Code"}],"StatusFlagOverrides":{ }, "Comments":{ -}, +"92":"local","94":"global"}, "LongComments":{ }, "Notes":{ @@ -64,7 +64,25 @@ "14":{ "Label":"__nopped","Value":4110,"Source":"User","Type":"LocalOrGlobalAddr"}, "13":{ -"Label":"start","Value":4109,"Source":"User","Type":"LocalOrGlobalAddr"}}, +"Label":"start","Value":4109,"Source":"User","Type":"LocalOrGlobalAddr"}, +"86":{ +"Label":"_uname","Value":4182,"Source":"User","Type":"LocalOrGlobalAddr"}, +"87":{ +"Label":"X_uname11","Value":4183,"Source":"User","Type":"LocalOrGlobalAddr"}, +"88":{ +"Label":"X_uname1","Value":4184,"Source":"User","Type":"LocalOrGlobalAddr"}, +"92":{ +"Label":"AND","Value":4188,"Source":"User","Type":"LocalOrGlobalAddr"}, +"94":{ +"Label":"JMP","Value":4190,"Source":"User","Type":"GlobalAddr"}, +"96":{ +"Label":"jmp","Value":4192,"Source":"User","Type":"GlobalAddr"}, +"98":{ +"Label":"TSB","Value":4194,"Source":"User","Type":"GlobalAddr"}, +"89":{ +"Label":"X_uname","Value":4185,"Source":"User","Type":"LocalOrGlobalAddr"}, +"100":{ +"Label":"XCE","Value":4196,"Source":"User","Type":"GlobalAddr"}}, "OperandFormats":{ "23":{ "Length":3,"Format":"NumericLE","SubFormat":"Symbol","SymbolRef":{ @@ -76,4 +94,6 @@ "Length":3,"Format":"NumericLE","SubFormat":"Symbol","SymbolRef":{ "Label":"_reach4","Part":"Low"}}, "58":{ -"Length":3,"Format":"NumericLE","SubFormat":"Hex","SymbolRef":null}}} +"Length":3,"Format":"NumericLE","SubFormat":"Hex","SymbolRef":null}}, +"LvTables":{ +}} diff --git a/SourceGen/SGTestData/Expected/2012-label-localizer_64tass.S b/SourceGen/SGTestData/Expected/2012-label-localizer_64tass.S index 169d0b6..da0408d 100644 --- a/SourceGen/SGTestData/Expected/2012-label-localizer_64tass.S +++ b/SourceGen/SGTestData/Expected/2012-label-localizer_64tass.S @@ -1,9 +1,7 @@ - .cpu "65816" + .cpu "65c02" REALLYLONGLABELNAME = $8888 ;that's a long name * = $1000 - .as - .xs nop X_start lda _start lda X_start1 @@ -56,5 +54,19 @@ globlat jsr nglobal end nop EXCESSIVELY_LONG_LABEL lda REALLYLONGLABELNAME +_X_uname + nop +_X_uname11 + nop +_X_uname1 + nop +_X_uname2 + nop + lda #$00 +_AND bne _AND ;local +JMP_1 bne JMP_1 ;global +jmp_1 bne jmp_1 +TSB_1 bne TSB_1 +XCE bne XCE rts diff --git a/SourceGen/SGTestData/Expected/2012-label-localizer_Merlin32.S b/SourceGen/SGTestData/Expected/2012-label-localizer_Merlin32.S index 8db753d..56cf38d 100644 --- a/SourceGen/SGTestData/Expected/2012-label-localizer_Merlin32.S +++ b/SourceGen/SGTestData/Expected/2012-label-localizer_Merlin32.S @@ -49,5 +49,17 @@ globlat jsr nglobal end nop EXCESSIVELY_LONG_LABEL lda REALLYLONGLABELNAME +:_uname nop +:X_uname11 + nop +:X_uname1 + nop +:X_uname nop + lda #$00 +:AND bne :AND ;local +JMP bne JMP ;global +jmp bne jmp +TSB bne TSB +XCE bne XCE rts diff --git a/SourceGen/SGTestData/Expected/2012-label-localizer_acme.S b/SourceGen/SGTestData/Expected/2012-label-localizer_acme.S index a7d7845..26b8546 100644 --- a/SourceGen/SGTestData/Expected/2012-label-localizer_acme.S +++ b/SourceGen/SGTestData/Expected/2012-label-localizer_acme.S @@ -1,9 +1,7 @@ - !cpu 65816 + !cpu 65c02 REALLYLONGLABELNAME = $8888 ;that's a long name * = $1000 - !as - !rs nop _start lda @start lda X_start @@ -55,5 +53,18 @@ globlat jsr nglobal end nop EXCESSIVELY_LONG_LABEL lda REALLYLONGLABELNAME +@_uname nop +@X_uname11 + nop +@X_uname1 + nop +@X_uname + nop + lda #$00 +@AND bne @AND ;local +JMP_1 bne JMP_1 ;global +jmp_1 bne jmp_1 +TSB_1 bne TSB_1 +XCE bne XCE rts diff --git a/SourceGen/SGTestData/Expected/2012-label-localizer_cc65.S b/SourceGen/SGTestData/Expected/2012-label-localizer_cc65.S index 42ec176..a01ca1e 100644 --- a/SourceGen/SGTestData/Expected/2012-label-localizer_cc65.S +++ b/SourceGen/SGTestData/Expected/2012-label-localizer_cc65.S @@ -1,10 +1,8 @@ - .setcpu "65816" + .setcpu "65C02" REALLYLONGLABELNAME = $8888 ;that's a long name ; .segment "SEG000" .org $1000 - .a8 - .i8 nop _start: lda @start lda X_start @@ -56,5 +54,18 @@ globlat: jsr nglobal end: nop EXCESSIVELY_LONG_LABEL: lda REALLYLONGLABELNAME +@_uname: nop +@X_uname11: + nop +@X_uname1: + nop +@X_uname: + nop + lda #$00 +@AND: bne @AND ;local +JMP_1: bne JMP_1 ;global +jmp_1: bne jmp_1 +TSB_1: bne TSB_1 +XCE: bne XCE rts diff --git a/SourceGen/SGTestData/Expected/2012-label-localizer_cc65.cfg b/SourceGen/SGTestData/Expected/2012-label-localizer_cc65.cfg index bc92852..b86d5ff 100644 --- a/SourceGen/SGTestData/Expected/2012-label-localizer_cc65.cfg +++ b/SourceGen/SGTestData/Expected/2012-label-localizer_cc65.cfg @@ -1,7 +1,7 @@ # 6502bench SourceGen generated linker script for 2012-label-localizer MEMORY { MAIN: file=%O, start=%S, size=65536; -# MEM000: file=%O, start=$1000, size=87; +# MEM000: file=%O, start=$1000, size=103; } SEGMENTS { CODE: load=MAIN, type=rw; diff --git a/SourceGen/SGTestData/Source/2012-label-localizer.S b/SourceGen/SGTestData/Source/2012-label-localizer.S index 5e8c73b..c8d1ec7 100644 --- a/SourceGen/SGTestData/Source/2012-label-localizer.S +++ b/SourceGen/SGTestData/Source/2012-label-localizer.S @@ -3,6 +3,8 @@ ; ; Assembler: Merlin 32 +; NOTE: select CPU=65C02 + EXTSYM equ $8888 ;EDIT: give this a long label org $1000 @@ -62,4 +64,20 @@ globlat jsr nglobal ;EDIT: mark label as global bra cont cont nop ;EDIT: mark label as global lda EXTSYM + +; test localizer uniquification +; only relevant for 64tass, which uses a leading '_' to indicate local labels +_uname nop +X_uname11 nop +X_uname1 nop +X_uname nop + +; test labels with opcode names (not really a localization thing) +; EDIT: set these names, mark as global to prevent localization + lda #$00 +ANDl bne ANDl ;leave label local +JMPg bne JMPg ;make label global +jmpg bne jmpg ;lower case name +TSBg bne TSBg +XCEg bne XCEg ;should be allowed rts