From c80be07f73dbc1332b7cf1445d701f69319060e3 Mon Sep 17 00:00:00 2001
From: Andy McFadden
Date: Fri, 2 Nov 2018 13:49:27 -0700
Subject: [PATCH] Work around Merlin 32 instruction parsing bug
The 2014-label-dp test now passes. Prior regression tests are
unaffected.
Also, renamed an IGenerator interface to more accurately reflect
its role.
(issue #37)
---
SourceGen/AsmGen/AsmCc65.cs | 9 +-
SourceGen/AsmGen/AsmMerlin32.cs | 28 +-
SourceGen/AsmGen/AsmTass64.cs | 3 +-
SourceGen/AsmGen/GenCommon.cs | 2 +-
SourceGen/AsmGen/IGenerator.cs | 13 +-
SourceGen/RuntimeData/Help/codegen.html | 23 +-
.../Expected/2014-label-dp_Merlin32.S | 289 ++++++++++++++++++
SourceGen/Symbol.cs | 13 +
8 files changed, 356 insertions(+), 24 deletions(-)
create mode 100644 SourceGen/SGTestData/Expected/2014-label-dp_Merlin32.S
diff --git a/SourceGen/AsmGen/AsmCc65.cs b/SourceGen/AsmGen/AsmCc65.cs
index db20bac..1819012 100644
--- a/SourceGen/AsmGen/AsmCc65.cs
+++ b/SourceGen/AsmGen/AsmCc65.cs
@@ -259,11 +259,12 @@ namespace SourceGen.AsmGen {
}
///
- /// Map the mnemonics we chose for undocumented opcodes to the cc65 mnemonics.
- /// After switching to the Unintended Opcodes mnemonics there's almost no difference.
+ /// Map the undocumented opcodes to the cc65 mnemonics. There's almost no difference
+ /// vs. the Unintended Opcodes mnemonics.
///
/// We don't include the double- and triple-byte NOPs here, as cc65 doesn't
- /// appear to have a definition for them (as of 2.17).
+ /// appear to have a definition for them (as of 2.17). We also omit the alias
+ /// for SBC. These will all be output as hex.
///
private static Dictionary sUndocMap = new Dictionary() {
{ OpName.ALR, "alr" }, // imm 0x4b
@@ -288,7 +289,7 @@ namespace SourceGen.AsmGen {
};
// IGenerator
- public string ReplaceMnemonic(OpDef op) {
+ public string ModifyOpcode(int offset, OpDef op) {
if ((op == OpDef.OpWDM_WDM || op == OpDef.OpBRK_StackInt) && mAsmVersion <= V2_17) {
// cc65 v2.17 doesn't support WDM, and assembles BRK to opcode $05.
// https://github.com/cc65/cc65/issues/715
diff --git a/SourceGen/AsmGen/AsmMerlin32.cs b/SourceGen/AsmGen/AsmMerlin32.cs
index 9414c2f..0b0d2fe 100644
--- a/SourceGen/AsmGen/AsmMerlin32.cs
+++ b/SourceGen/AsmGen/AsmMerlin32.cs
@@ -328,12 +328,34 @@ namespace SourceGen.AsmGen {
}
// IGenerator
- public string ReplaceMnemonic(OpDef op) {
+ public string ModifyOpcode(int offset, OpDef op) {
if (op.IsUndocumented) {
return null;
- } else {
- return string.Empty;
}
+
+ // The assembler works correctly if the symbol is defined as a two-digit hex
+ // value (e.g. "foo equ $80") but fails if it's four (e.g. "foo equ $0080"). We
+ // output symbols with minimal digits, but we have no control over labels when
+ // the code has a zero-page EQU. So if the operand is a reference to a user
+ // label, we need to output the instruction as hex.
+ if (op == OpDef.OpPEI_StackDPInd ||
+ op == OpDef.OpSTY_DPIndexX ||
+ op == OpDef.OpSTX_DPIndexY ||
+ op.AddrMode == OpDef.AddressMode.DPIndLong ||
+ op.AddrMode == OpDef.AddressMode.DPInd ||
+ op.AddrMode == OpDef.AddressMode.DPIndexXInd) {
+ FormatDescriptor dfd = Project.GetAnattrib(offset).DataDescriptor;
+ if (dfd != null && dfd.HasSymbol) {
+ // It has a symbol. See if the symbol target is a label (auto or user).
+ if (Project.SymbolTable.TryGetValue(dfd.SymbolRef.Label, out Symbol sym)) {
+ if (sym.IsInternalLabel) {
+ return null;
+ }
+ }
+ }
+ }
+
+ return string.Empty;
}
// IGenerator
diff --git a/SourceGen/AsmGen/AsmTass64.cs b/SourceGen/AsmGen/AsmTass64.cs
index 4c11f38..d4bb7aa 100644
--- a/SourceGen/AsmGen/AsmTass64.cs
+++ b/SourceGen/AsmGen/AsmTass64.cs
@@ -244,7 +244,6 @@ namespace SourceGen.AsmGen {
} else if (cpuDef.Type == CpuDef.CpuType.Cpu6502 && cpuDef.HasUndocumented) {
cpuStr = "6502i";
} else {
- // 6502 def includes undocumented ops
cpuStr = "6502";
}
@@ -253,7 +252,7 @@ namespace SourceGen.AsmGen {
}
// IGenerator
- public string ReplaceMnemonic(OpDef op) {
+ public string ModifyOpcode(int offset, OpDef op) {
if (op.IsUndocumented) {
if (Project.CpuDef.Type == CpuDef.CpuType.Cpu65C02) {
// none of the "LDD" stuff is handled
diff --git a/SourceGen/AsmGen/GenCommon.cs b/SourceGen/AsmGen/GenCommon.cs
index 996b0e1..c3345e5 100644
--- a/SourceGen/AsmGen/GenCommon.cs
+++ b/SourceGen/AsmGen/GenCommon.cs
@@ -172,7 +172,6 @@ namespace SourceGen.AsmGen {
wdis = OpDef.GetWidthDisambiguation(instrLen, operand);
}
- string replMnemonic = gen.ReplaceMnemonic(op);
string opcodeStr = formatter.FormatOpcode(op, wdis);
string formattedOperand = null;
@@ -268,6 +267,7 @@ namespace SourceGen.AsmGen {
}
string commentStr = formatter.FormatEolComment(eolComment);
+ string replMnemonic = gen.ModifyOpcode(offset, op);
if (attr.Length != instrBytes) {
// This instruction has another instruction inside it. Throw out what we
// computed and just output as bytes.
diff --git a/SourceGen/AsmGen/IGenerator.cs b/SourceGen/AsmGen/IGenerator.cs
index 27fae60..6d7afbe 100644
--- a/SourceGen/AsmGen/IGenerator.cs
+++ b/SourceGen/AsmGen/IGenerator.cs
@@ -85,14 +85,14 @@ namespace SourceGen.AsmGen {
List GenerateSource(BackgroundWorker worker);
///
- /// Provides an opportunity for the assembler to replace a mnemonic with another. This
- /// is primarily intended for undocumented ops, which don't have standard mnemonics,
- /// and hence can vary between assemblers.
+ /// Provides an opportunity for the assembler to replace a mnemonic with another, or
+ /// output an instruction as hex bytes.
///
+ /// Opcode offset.
/// Opcode to replace.
/// Replacement mnemonic, an empty string if the original is fine, or
- /// null if the op is not supported at all and should be emitted as hex.
- string ReplaceMnemonic(OpDef op);
+ /// null if the op is unsupported or broken and should be emitted as hex.
+ string ModifyOpcode(int offset, OpDef op);
///
/// Generates an opcode/operand pair for a short sequence of bytes (1-4 bytes).
@@ -163,6 +163,9 @@ namespace SourceGen.AsmGen {
void OutputLine(string fullLine);
}
+ ///
+ /// Enumeration of quirky or buggy behavior that GenCommon needs to handle.
+ ///
public class AssemblerQuirks {
///
/// Are the arguments to MVN/MVP reversed?
diff --git a/SourceGen/RuntimeData/Help/codegen.html b/SourceGen/RuntimeData/Help/codegen.html
index 1052a5f..71da80d 100644
--- a/SourceGen/RuntimeData/Help/codegen.html
+++ b/SourceGen/RuntimeData/Help/codegen.html
@@ -148,8 +148,8 @@ code, but also needs to know how to handle the corner cases.
as case-sensitive. The --case-sensitive
must be passed to
the assembler.
If you set the --case-sensitive
flag, all opcodes
- and operands must be lower-case. Most of the flags used to show
- things in upper case must be disabled.
+ and operands must be lower-case. Most of the SourceGen options used to
+ show things in upper case must be disabled.
For 65816, selecting the bank byte is done with the back-quote ('`')
rather than the caret ('^'). (There's a note in the docs to the effect
that they plan to move to carets.)
@@ -166,7 +166,7 @@ code, but also needs to know how to handle the corner cases.
PC relative branches don't wrap around at bank boundaries.
BRK <arg> is assembled to opcode $05 rather than $00.
WDM is not supported.
- Source file names must not have spaces in them on Windows.
+ Source file names may not have spaces in them on Windows.
Quirks:
@@ -181,10 +181,11 @@ code, but also needs to know how to handle the corner cases.
Undocumented opcodes: SBX ($cb) uses the mnemonic AXS. All other
opcodes match up with the "unintended opcodes" document.
ca65 is implemented as a single-pass assembler, so label widths
- can't always be known in time. For example, if you .ORG $0000 after
- the point where the labels are used, the assembler will already have
- generated them as absolute values. Width disambiguation must be applied
- to instructions that aren't ambiguous to multi-pass assemblers.
+ can't always be known in time. For example, if you use some zero-page
+ labels, but they're defined via .ORG $0000 after the point where the
+ labels are used, the assembler will already have generated them as
+ absolute values. Width disambiguation must be applied to operands
+ that wouldn't be ambiguous to a multi-pass assembler.
The assembler is geared toward generating relocatable code with
multiple segments (it is, after all, an assembler for a C compiler).
A linker script is expected to be provided for anything complex. Since
@@ -201,8 +202,12 @@ code, but also needs to know how to handle the corner cases.
- PC relative branches don't wrap around at bank boundaries.
- For some failures, an exit code of zero is returned.
- - Some indexed store instructions cause errors if the label isn't
- unambiguously DP (e.g. `STX $00,X` vs. `STX $0000,X`).
+ - Some DP indexed store instructions cause errors if the label isn't
+ unambiguously DP (e.g.
STX $00,X
vs.
+ STX $0000,X
). This isn't a problem with project/platform
+ symbols, which are output as two-digit hex values when possible, but
+ causes failures when direct page locations are included in the project
+ and given labels.
Quirks:
diff --git a/SourceGen/SGTestData/Expected/2014-label-dp_Merlin32.S b/SourceGen/SGTestData/Expected/2014-label-dp_Merlin32.S
new file mode 100644
index 0000000..8f44ad3
--- /dev/null
+++ b/SourceGen/SGTestData/Expected/2014-label-dp_Merlin32.S
@@ -0,0 +1,289 @@
+;6502bench SourceGen v1.1.0-dev1
+ org $1000
+ sec
+ xce
+ jsr L101F
+ jsr L10AB
+ jsr L10F2
+ jsr L1106
+ jsr L1109
+ jsr L112C
+ jsr L11F9
+ jsr L11FC
+ nop
+ nop
+ nop
+ brk $80
+
+L101F dfb $01,$80
+ cop $80
+ ora $80,S
+ tsb L0080
+ ora L0080
+ asl L0080
+ dfb $07,$80
+ php
+ ora #$80
+ asl A
+ phd
+ tsb: L0086
+ ora: L0086
+ asl: L0086
+ oral L0089
+ bpl L1041
+L1041 ora (L0080),y
+ dfb $12,$80
+ ora ($80,S),y
+ trb L0080
+ ora L0080,x
+ asl L0080,x
+ ora [L0080],y
+ clc
+ ora L0086,y
+ inc A
+ tcs
+ trb: L0086
+ ora: L0086,x
+ asl: L0086,x
+ oral L0089,x
+ jsr L0086
+ dfb $21,$80
+ jsl L0089
+ and $80,S
+ bit L0080
+ and L0080
+ rol L0080
+ dfb $27,$80
+ plp
+ and #$80
+ rol A
+ pld
+ bit: L0086
+ and: L0086
+ rol: L0086
+ andl L0089
+ bmi L1089
+L1089 and (L0080),y
+ dfb $32,$80
+ and ($80,S),y
+ bit L0080,x
+ and L0080,x
+ rol L0080,x
+ and [L0080],y
+ sec
+ and L0086,y
+ dec A
+ tsc
+ bit: L0086,x
+ and: L0086,x
+ rol: L0086,x
+ andl L0089,x
+ rti
+
+L10AB dfb $41,$80
+ wdm $80
+ eor $80,S
+ mvp $84,$83
+ eor L0080
+ lsr L0080
+ dfb $47,$80
+ pha
+ eor #$80
+ lsr A
+ phk
+ jmp L10C2
+
+L10C2 eor: L0086
+ lsr: L0086
+ eorl L0089
+ bvc L10CE
+L10CE eor (L0080),y
+ dfb $52,$80
+ eor ($80,S),y
+ mvn $84,$83
+ eor L0080,x
+ lsr L0080,x
+ eor [L0080],y
+ cli
+ eor L0086,y
+ phy
+ tcd
+ jml L10E7
+
+L10E7 eor: L0086,x
+ lsr: L0086,x
+ eorl L0089,x
+ rts
+
+L10F2 dfb $61,$80
+ per $0ff6
+ adc $80,S
+ stz L0080
+ adc L0080
+ ror L0080
+ dfb $67,$80
+ pla
+ adc #$80
+ ror A
+ rtl
+
+L1106 jmp (L0086)
+
+L1109 adc: L0086
+ ror: L0086
+ adcl L0089
+ bvs L1115
+L1115 adc (L0080),y
+ dfb $72,$80
+ adc ($80,S),y
+ stz L0080,x
+ adc L0080,x
+ ror L0080,x
+ adc [L0080],y
+ sei
+ adc L0086,y
+ ply
+ tdc
+ jmp (L0086,x)
+
+L112C adc: L0086,x
+ ror: L0086,x
+ adcl L0089,x
+ bra L1138
+
+L1138 dfb $81,$80
+ brl L113D
+
+L113D sta $80,S
+ sty L0080
+ sta L0080
+ stx L0080
+ dfb $87,$80
+ dey
+ bit #$80
+ txa
+ phb
+ sty: L0086
+ sta: L0086
+ stx: L0086
+ stal L0089
+ bcc L115B
+L115B sta (L0080),y
+ dfb $92,$80
+ sta ($80,S),y
+ dfb $94,$80
+ sta L0080,x
+ dfb $96,$80
+ sta [L0080],y
+ tya
+ sta L0086,y
+ txs
+ txy
+ stz: L0086
+ sta: L0086,x
+ stz: L0086,x
+ stal L0089,x
+ ldy #$80
+ dfb $a1,$80
+ ldx #$80
+ lda $80,S
+ ldy L0080
+ lda L0080
+ ldx L0080
+ dfb $a7,$80
+ tay
+ lda #$80
+ tax
+ plb
+ ldy: L0086
+ lda: L0086
+ ldx: L0086
+ ldal L0089
+ bcs L11A0
+L11A0 lda (L0080),y
+ dfb $b2,$80
+ lda ($80,S),y
+ ldy L0080,x
+ lda L0080,x
+ ldx L0080,y
+ lda [L0080],y
+ clv
+ lda L0086,y
+ tsx
+ tyx
+ ldy: L0086,x
+ lda: L0086,x
+ ldx: L0086,y
+ ldal L0089,x
+ cpy #$80
+ dfb $c1,$80
+ rep #$00
+ cmp $80,S
+ cpy L0080
+ cmp L0080
+ dec L0080
+ dfb $c7,$80
+ iny
+ cmp #$80
+ dex
+ wai
+ cpy: L0086
+ cmp: L0086
+ dec: L0086
+ cmpl L0089
+ bne L11E5
+L11E5 cmp (L0080),y
+ dfb $d2,$80
+ cmp ($80,S),y
+ dfb $d4,$80
+ cmp L0080,x
+ dec L0080,x
+ cmp [L0080],y
+ cld
+ cmp L0086,y
+ phx
+ stp
+
+L11F9 jml [L0086]
+
+L11FC cmp: L0086,x
+ dec: L0086,x
+ cmpl L0089,x
+ cpx #$80
+ dfb $e1,$80
+ sep #$00
+ sbc $80,S
+ cpx L0080
+ sbc L0080
+ inc L0080
+ dfb $e7,$80
+ inx
+ sbc #$80
+ nop
+ xba
+ cpx: L0086
+ sbc: L0086
+ inc: L0086
+ sbcl L0089
+ beq L122A
+L122A sbc (L0080),y
+ dfb $f2,$80
+ sbc ($80,S),y
+ pea L0086
+ sbc L0080,x
+ inc L0080,x
+ sbc [L0080],y
+ sed
+ sbc L0086,y
+ plx
+ xce
+ jsr (L0086,x)
+ sbc: L0086,x
+ inc: L0086,x
+ sbcl L0089,x
+ org $0080
+L0080 bit L0082
+L0082 bit L0082
+ bit L0082
+L0086 bit: L0086
+L0089 ldal L0089
diff --git a/SourceGen/Symbol.cs b/SourceGen/Symbol.cs
index c4f5e58..3ec391d 100644
--- a/SourceGen/Symbol.cs
+++ b/SourceGen/Symbol.cs
@@ -48,6 +48,18 @@ namespace SourceGen {
Constant // constant value
}
+ ///
+ /// Returns true if the symbol's type is an internal label (auto or user). Returns
+ /// false for external addresses and constants.
+ ///
+ public bool IsInternalLabel {
+ get {
+ return SymbolType == Type.LocalOrGlobalAddr ||
+ SymbolType == Type.GlobalAddr ||
+ SymbolType == Type.GlobalAddrExport;
+ }
+ }
+
///
/// Label sent to assembler.
@@ -74,6 +86,7 @@ namespace SourceGen {
///
public string SourceTypeString { get; private set; }
+
// No nullary constructor.
private Symbol() { }