1
0
mirror of https://github.com/fadden/6502bench.git synced 2025-01-19 08:29:48 +00:00

Remap labels that use opcode mnemonics

In a recent survey, three out of four cross assemblers surveyed
recommended not using opcode mnemonics to their patients who use
labels.  We now remap labels like "AND" and "jmp", using the label
map that's part of the label localizer.

We skip the step for Merlin 32, which is perfectly happy to assemble
"JMP JMP JMP".

Also, fixed a bug in MaskLeadingUnderscores that could hang the
source generator thread.
This commit is contained in:
Andy McFadden 2019-09-20 15:29:34 -07:00
parent b74630dd5b
commit 824add17e8
14 changed files with 197 additions and 21 deletions

View File

@ -227,7 +227,7 @@ namespace Asm65 {
/// Returns an entry from the OpDef array for the specified opcode, 0-255. (We could /// Returns an entry from the OpDef array for the specified opcode, 0-255. (We could
/// probably just make this the class indexer.) /// probably just make this the class indexer.)
/// </summary> /// </summary>
/// <param name="op">Instruction opcode</param> /// <param name="op">Instruction opcode number (0-255).</param>
/// <returns>Instruction definition.</returns> /// <returns>Instruction definition.</returns>
public OpDef GetOpDef(int op) { return mOpDefs[op]; } public OpDef GetOpDef(int op) { return mOpDefs[op]; }

View File

@ -222,6 +222,7 @@ namespace SourceGen.AsmGen {
mLocalizer.LocalPrefix = "@"; mLocalizer.LocalPrefix = "@";
mLocalizer.Analyze(); mLocalizer.Analyze();
} }
mLocalizer.FixOpcodeLabels();
// Use UTF-8 encoding, without a byte-order mark. // Use UTF-8 encoding, without a byte-order mark.
using (StreamWriter sw = new StreamWriter(pathName, false, new UTF8Encoding(false))) { using (StreamWriter sw = new StreamWriter(pathName, false, new UTF8Encoding(false))) {

View File

@ -220,6 +220,7 @@ namespace SourceGen.AsmGen {
mLocalizer.QuirkVariablesEndScope = true; mLocalizer.QuirkVariablesEndScope = true;
mLocalizer.Analyze(); mLocalizer.Analyze();
} }
mLocalizer.FixOpcodeLabels();
// Use UTF-8 encoding, without a byte-order mark. // Use UTF-8 encoding, without a byte-order mark.
using (StreamWriter sw = new StreamWriter(cfgName, false, new UTF8Encoding(false))) { using (StreamWriter sw = new StreamWriter(cfgName, false, new UTF8Encoding(false))) {

View File

@ -191,6 +191,7 @@ namespace SourceGen.AsmGen {
mLocalizer.LocalPrefix = ":"; mLocalizer.LocalPrefix = ":";
mLocalizer.Analyze(); mLocalizer.Analyze();
} }
//mLocalizer.FixOpcodeLabels();
// Use UTF-8 encoding, without a byte-order mark. // Use UTF-8 encoding, without a byte-order mark.
using (StreamWriter sw = new StreamWriter(pathName, false, new UTF8Encoding(false))) { using (StreamWriter sw = new StreamWriter(pathName, false, new UTF8Encoding(false))) {

View File

@ -231,6 +231,7 @@ namespace SourceGen.AsmGen {
mLocalizer.Analyze(); mLocalizer.Analyze();
} }
mLocalizer.MaskLeadingUnderscores(); mLocalizer.MaskLeadingUnderscores();
mLocalizer.FixOpcodeLabels();
// Use UTF-8 encoding, without a byte-order mark. // Use UTF-8 encoding, without a byte-order mark.
using (StreamWriter sw = new StreamWriter(pathName, false, new UTF8Encoding(false))) { using (StreamWriter sw = new StreamWriter(pathName, false, new UTF8Encoding(false))) {

View File

@ -332,11 +332,12 @@ namespace SourceGen.AsmGen {
/// This is necessary for assemblers like 64tass that use a leading underscore to /// This is necessary for assemblers like 64tass that use a leading underscore to
/// indicate that a label should be local. /// indicate that a label should be local.
/// ///
/// This may be called even if label localization is disabled. In that case we just
/// create an empty label map and populate as needed.
///
/// Only call this if underscores are used to indicate local labels. /// Only call this if underscores are used to indicate local labels.
/// </summary> /// </summary>
/// <remarks>
/// This may be called even if label localization is disabled. In that case we just
/// create an empty label map and populate as needed.
/// </remarks>
public void MaskLeadingUnderscores() { public void MaskLeadingUnderscores() {
bool allGlobal = false; bool allGlobal = false;
if (LabelMap == null) { if (LabelMap == null) {
@ -383,8 +384,9 @@ namespace SourceGen.AsmGen {
// Make sure it's unique. // Make sure it's unique.
string uniqueLabel = newLabel; string uniqueLabel = newLabel;
int uval = 1; int uval = 0;
while (allLabels.ContainsKey(uniqueLabel)) { while (allLabels.ContainsKey(uniqueLabel)) {
uval++;
uniqueLabel = newLabel + uval.ToString(); uniqueLabel = newLabel + uval.ToString();
} }
allLabels.Add(uniqueLabel, uniqueLabel); allLabels.Add(uniqueLabel, uniqueLabel);
@ -397,5 +399,91 @@ namespace SourceGen.AsmGen {
Debug.WriteLine("UMAP: allcount=" + allLabels.Count + " mapcount=" + LabelMap.Count); Debug.WriteLine("UMAP: allcount=" + allLabels.Count + " mapcount=" + LabelMap.Count);
} }
/// <summary>
/// Remaps labels that match opcode names. Updated names will be added to LabelMap.
/// This should be run after localization and underscore concealment have finished.
/// </summary>
/// <remarks>
/// Most assemblers don't like it if you create a label with the same name as an
/// opcode, e.g. "jmp LSR" doesn't work. We can use the label map to work around
/// the issue.
///
/// Most assemblers regard mnemonics as case-insensitive, even if labels are
/// case-sensitive, so we want to remap both "lsr" and "LSR".
///
/// This doesn't really have anything to do with label localization other than that
/// we're updating the label remap table.
/// </remarks>
public void FixOpcodeLabels() {
if (LabelMap == null) {
LabelMap = new Dictionary<string, string>();
}
// Create a searchable list of opcode names using the current CPU definition.
// (All tested assemblers that failed on opcode names only did so for names
// that were part of the current definition, e.g. "TSB" was accepted as a label
// when the CPU was set to 6502.)
Dictionary<string, Asm65.OpDef> opnames = new Dictionary<string, Asm65.OpDef>();
Asm65.CpuDef cpuDef = mProject.CpuDef;
for (int i = 0; i < 256; i++) {
Asm65.OpDef op = cpuDef.GetOpDef(i);
// There may be multiple entries with the same name (e.g. "NOP"). That's fine.
opnames[op.Mnemonic.ToUpperInvariant()] = op;
}
// Create a list of all labels, for uniqueness testing. If a label has been
// remapped, we add the remapped entry.
// (All tested assemblers that failed on opcode names only did so for names
// in their non-localized form. While "LSR" failed, "@LSR", "_LSR", ".LSR", etc.
// were accepted. So if it was remapped by the localizer, we don't need to
// worry about it.)
SortedList<string, string> allLabels = new SortedList<string, string>();
for (int i = 0; i < mProject.FileDataLength; i++) {
Symbol sym = mProject.GetAnattrib(i).Symbol;
if (sym == null) {
continue;
}
LabelMap.TryGetValue(sym.Label, out string mapLabel);
if (mapLabel != null) {
allLabels.Add(mapLabel, mapLabel);
} else {
allLabels.Add(sym.Label, sym.Label);
}
}
// Now run through the list of labels, looking for any that match opcode
// mnemonics.
for (int i = 0; i < mProject.FileDataLength; i++) {
Symbol sym = mProject.GetAnattrib(i).Symbol;
if (sym == null) {
// No label at this offset.
continue;
}
string cmpLabel = sym.Label;
if (LabelMap.TryGetValue(sym.Label, out string mapLabel)) {
cmpLabel = mapLabel;
}
if (opnames.ContainsKey(cmpLabel.ToUpperInvariant())) {
//Debug.WriteLine("Remapping label (op mnemonic): " + sym.Label);
int uval = 0;
string uniqueLabel;
do {
uval++;
uniqueLabel = cmpLabel + "_" + uval.ToString();
} while (allLabels.ContainsKey(uniqueLabel));
allLabels.Add(uniqueLabel, uniqueLabel);
LabelMap.Add(sym.Label, uniqueLabel);
}
}
if (LabelMap.Count == 0) {
// didn't do anything, lose the table
LabelMap = null;
}
}
} }
} }

View File

@ -1,8 +1,8 @@
### 6502bench SourceGen dis65 v1.0 ### ### 6502bench SourceGen dis65 v1.0 ###
{ {
"_ContentVersion":1,"FileDataLength":87,"FileDataCrc32":255194945,"ProjectProps":{ "_ContentVersion":2,"FileDataLength":103,"FileDataCrc32":1381810255,"ProjectProps":{
"CpuName":"65816","IncludeUndocumentedInstr":false,"EntryFlags":32702671,"AnalysisParams":{ "CpuName":"65C02","IncludeUndocumentedInstr":false,"EntryFlags":32702671,"AutoLabelStyle":"Simple","AnalysisParams":{
"AnalyzeUncategorizedData":true,"MinCharsForString":4,"SeekNearbyTargets":true}, "AnalyzeUncategorizedData":true,"DefaultTextScanMode":"LowHighAscii","MinCharsForString":4,"SeekNearbyTargets":true,"SmartPlpHandling":true},
"PlatformSymbolFileIdentifiers":[],"ExtensionScriptFileIdentifiers":[],"ProjectSyms":{ "PlatformSymbolFileIdentifiers":[],"ExtensionScriptFileIdentifiers":[],"ProjectSyms":{
"__ENABLE_LABEL_LOCALIZATION":{ "__ENABLE_LABEL_LOCALIZATION":{
"DataDescriptor":{ "DataDescriptor":{
@ -21,7 +21,7 @@
"Low":0,"High":0,"Hint":"Code"}],"StatusFlagOverrides":{ "Low":0,"High":0,"Hint":"Code"}],"StatusFlagOverrides":{
}, },
"Comments":{ "Comments":{
}, "92":"local","94":"global"},
"LongComments":{ "LongComments":{
}, },
"Notes":{ "Notes":{
@ -64,7 +64,25 @@
"14":{ "14":{
"Label":"__nopped","Value":4110,"Source":"User","Type":"LocalOrGlobalAddr"}, "Label":"__nopped","Value":4110,"Source":"User","Type":"LocalOrGlobalAddr"},
"13":{ "13":{
"Label":"start","Value":4109,"Source":"User","Type":"LocalOrGlobalAddr"}}, "Label":"start","Value":4109,"Source":"User","Type":"LocalOrGlobalAddr"},
"86":{
"Label":"_uname","Value":4182,"Source":"User","Type":"LocalOrGlobalAddr"},
"87":{
"Label":"X_uname11","Value":4183,"Source":"User","Type":"LocalOrGlobalAddr"},
"88":{
"Label":"X_uname1","Value":4184,"Source":"User","Type":"LocalOrGlobalAddr"},
"92":{
"Label":"AND","Value":4188,"Source":"User","Type":"LocalOrGlobalAddr"},
"94":{
"Label":"JMP","Value":4190,"Source":"User","Type":"GlobalAddr"},
"96":{
"Label":"jmp","Value":4192,"Source":"User","Type":"GlobalAddr"},
"98":{
"Label":"TSB","Value":4194,"Source":"User","Type":"GlobalAddr"},
"89":{
"Label":"X_uname","Value":4185,"Source":"User","Type":"LocalOrGlobalAddr"},
"100":{
"Label":"XCE","Value":4196,"Source":"User","Type":"GlobalAddr"}},
"OperandFormats":{ "OperandFormats":{
"23":{ "23":{
"Length":3,"Format":"NumericLE","SubFormat":"Symbol","SymbolRef":{ "Length":3,"Format":"NumericLE","SubFormat":"Symbol","SymbolRef":{
@ -76,4 +94,6 @@
"Length":3,"Format":"NumericLE","SubFormat":"Symbol","SymbolRef":{ "Length":3,"Format":"NumericLE","SubFormat":"Symbol","SymbolRef":{
"Label":"_reach4","Part":"Low"}}, "Label":"_reach4","Part":"Low"}},
"58":{ "58":{
"Length":3,"Format":"NumericLE","SubFormat":"Hex","SymbolRef":null}}} "Length":3,"Format":"NumericLE","SubFormat":"Hex","SymbolRef":null}},
"LvTables":{
}}

View File

@ -1,9 +1,7 @@
.cpu "65816" .cpu "65c02"
REALLYLONGLABELNAME = $8888 ;that's a long name REALLYLONGLABELNAME = $8888 ;that's a long name
* = $1000 * = $1000
.as
.xs
nop nop
X_start lda _start X_start lda _start
lda X_start1 lda X_start1
@ -56,5 +54,19 @@ globlat jsr nglobal
end nop end nop
EXCESSIVELY_LONG_LABEL EXCESSIVELY_LONG_LABEL
lda REALLYLONGLABELNAME lda REALLYLONGLABELNAME
_X_uname
nop
_X_uname11
nop
_X_uname1
nop
_X_uname2
nop
lda #$00
_AND bne _AND ;local
JMP_1 bne JMP_1 ;global
jmp_1 bne jmp_1
TSB_1 bne TSB_1
XCE bne XCE
rts rts

View File

@ -49,5 +49,17 @@ globlat jsr nglobal
end nop end nop
EXCESSIVELY_LONG_LABEL EXCESSIVELY_LONG_LABEL
lda REALLYLONGLABELNAME lda REALLYLONGLABELNAME
:_uname nop
:X_uname11
nop
:X_uname1
nop
:X_uname nop
lda #$00
:AND bne :AND ;local
JMP bne JMP ;global
jmp bne jmp
TSB bne TSB
XCE bne XCE
rts rts

View File

@ -1,9 +1,7 @@
!cpu 65816 !cpu 65c02
REALLYLONGLABELNAME = $8888 ;that's a long name REALLYLONGLABELNAME = $8888 ;that's a long name
* = $1000 * = $1000
!as
!rs
nop nop
_start lda @start _start lda @start
lda X_start lda X_start
@ -55,5 +53,18 @@ globlat jsr nglobal
end nop end nop
EXCESSIVELY_LONG_LABEL EXCESSIVELY_LONG_LABEL
lda REALLYLONGLABELNAME lda REALLYLONGLABELNAME
@_uname nop
@X_uname11
nop
@X_uname1
nop
@X_uname
nop
lda #$00
@AND bne @AND ;local
JMP_1 bne JMP_1 ;global
jmp_1 bne jmp_1
TSB_1 bne TSB_1
XCE bne XCE
rts rts

View File

@ -1,10 +1,8 @@
.setcpu "65816" .setcpu "65C02"
REALLYLONGLABELNAME = $8888 ;that's a long name REALLYLONGLABELNAME = $8888 ;that's a long name
; .segment "SEG000" ; .segment "SEG000"
.org $1000 .org $1000
.a8
.i8
nop nop
_start: lda @start _start: lda @start
lda X_start lda X_start
@ -56,5 +54,18 @@ globlat: jsr nglobal
end: nop end: nop
EXCESSIVELY_LONG_LABEL: EXCESSIVELY_LONG_LABEL:
lda REALLYLONGLABELNAME lda REALLYLONGLABELNAME
@_uname: nop
@X_uname11:
nop
@X_uname1:
nop
@X_uname:
nop
lda #$00
@AND: bne @AND ;local
JMP_1: bne JMP_1 ;global
jmp_1: bne jmp_1
TSB_1: bne TSB_1
XCE: bne XCE
rts rts

View File

@ -1,7 +1,7 @@
# 6502bench SourceGen generated linker script for 2012-label-localizer # 6502bench SourceGen generated linker script for 2012-label-localizer
MEMORY { MEMORY {
MAIN: file=%O, start=%S, size=65536; MAIN: file=%O, start=%S, size=65536;
# MEM000: file=%O, start=$1000, size=87; # MEM000: file=%O, start=$1000, size=103;
} }
SEGMENTS { SEGMENTS {
CODE: load=MAIN, type=rw; CODE: load=MAIN, type=rw;

View File

@ -3,6 +3,8 @@
; ;
; Assembler: Merlin 32 ; Assembler: Merlin 32
; NOTE: select CPU=65C02
EXTSYM equ $8888 ;EDIT: give this a long label EXTSYM equ $8888 ;EDIT: give this a long label
org $1000 org $1000
@ -62,4 +64,20 @@ globlat jsr nglobal ;EDIT: mark label as global
bra cont bra cont
cont nop ;EDIT: mark label as global cont nop ;EDIT: mark label as global
lda EXTSYM lda EXTSYM
; test localizer uniquification
; only relevant for 64tass, which uses a leading '_' to indicate local labels
_uname nop
X_uname11 nop
X_uname1 nop
X_uname nop
; test labels with opcode names (not really a localization thing)
; EDIT: set these names, mark as global to prevent localization
lda #$00
ANDl bne ANDl ;leave label local
JMPg bne JMPg ;make label global
jmpg bne jmpg ;lower case name
TSBg bne TSBg
XCEg bne XCEg ;should be allowed
rts rts