mirror of
https://github.com/fadden/6502bench.git
synced 2024-12-28 01:29:29 +00:00
Work around Merlin 32 instruction parsing bug
The 2014-label-dp test now passes. Prior regression tests are unaffected. Also, renamed an IGenerator interface to more accurately reflect its role. (issue #37)
This commit is contained in:
parent
2096bd2c66
commit
c80be07f73
@ -259,11 +259,12 @@ namespace SourceGen.AsmGen {
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Map the mnemonics we chose for undocumented opcodes to the cc65 mnemonics.
|
||||
/// After switching to the Unintended Opcodes mnemonics there's almost no difference.
|
||||
/// Map the undocumented opcodes to the cc65 mnemonics. There's almost no difference
|
||||
/// vs. the Unintended Opcodes mnemonics.
|
||||
///
|
||||
/// We don't include the double- and triple-byte NOPs here, as cc65 doesn't
|
||||
/// appear to have a definition for them (as of 2.17).
|
||||
/// appear to have a definition for them (as of 2.17). We also omit the alias
|
||||
/// for SBC. These will all be output as hex.
|
||||
/// </summary>
|
||||
private static Dictionary<string, string> sUndocMap = new Dictionary<string, string>() {
|
||||
{ OpName.ALR, "alr" }, // imm 0x4b
|
||||
@ -288,7 +289,7 @@ namespace SourceGen.AsmGen {
|
||||
};
|
||||
|
||||
// IGenerator
|
||||
public string ReplaceMnemonic(OpDef op) {
|
||||
public string ModifyOpcode(int offset, OpDef op) {
|
||||
if ((op == OpDef.OpWDM_WDM || op == OpDef.OpBRK_StackInt) && mAsmVersion <= V2_17) {
|
||||
// cc65 v2.17 doesn't support WDM, and assembles BRK <arg> to opcode $05.
|
||||
// https://github.com/cc65/cc65/issues/715
|
||||
|
@ -328,12 +328,34 @@ namespace SourceGen.AsmGen {
|
||||
}
|
||||
|
||||
// IGenerator
|
||||
public string ReplaceMnemonic(OpDef op) {
|
||||
public string ModifyOpcode(int offset, OpDef op) {
|
||||
if (op.IsUndocumented) {
|
||||
return null;
|
||||
} else {
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
// The assembler works correctly if the symbol is defined as a two-digit hex
|
||||
// value (e.g. "foo equ $80") but fails if it's four (e.g. "foo equ $0080"). We
|
||||
// output symbols with minimal digits, but we have no control over labels when
|
||||
// the code has a zero-page EQU. So if the operand is a reference to a user
|
||||
// label, we need to output the instruction as hex.
|
||||
if (op == OpDef.OpPEI_StackDPInd ||
|
||||
op == OpDef.OpSTY_DPIndexX ||
|
||||
op == OpDef.OpSTX_DPIndexY ||
|
||||
op.AddrMode == OpDef.AddressMode.DPIndLong ||
|
||||
op.AddrMode == OpDef.AddressMode.DPInd ||
|
||||
op.AddrMode == OpDef.AddressMode.DPIndexXInd) {
|
||||
FormatDescriptor dfd = Project.GetAnattrib(offset).DataDescriptor;
|
||||
if (dfd != null && dfd.HasSymbol) {
|
||||
// It has a symbol. See if the symbol target is a label (auto or user).
|
||||
if (Project.SymbolTable.TryGetValue(dfd.SymbolRef.Label, out Symbol sym)) {
|
||||
if (sym.IsInternalLabel) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
// IGenerator
|
||||
|
@ -244,7 +244,6 @@ namespace SourceGen.AsmGen {
|
||||
} else if (cpuDef.Type == CpuDef.CpuType.Cpu6502 && cpuDef.HasUndocumented) {
|
||||
cpuStr = "6502i";
|
||||
} else {
|
||||
// 6502 def includes undocumented ops
|
||||
cpuStr = "6502";
|
||||
}
|
||||
|
||||
@ -253,7 +252,7 @@ namespace SourceGen.AsmGen {
|
||||
}
|
||||
|
||||
// IGenerator
|
||||
public string ReplaceMnemonic(OpDef op) {
|
||||
public string ModifyOpcode(int offset, OpDef op) {
|
||||
if (op.IsUndocumented) {
|
||||
if (Project.CpuDef.Type == CpuDef.CpuType.Cpu65C02) {
|
||||
// none of the "LDD" stuff is handled
|
||||
|
@ -172,7 +172,6 @@ namespace SourceGen.AsmGen {
|
||||
wdis = OpDef.GetWidthDisambiguation(instrLen, operand);
|
||||
}
|
||||
|
||||
string replMnemonic = gen.ReplaceMnemonic(op);
|
||||
string opcodeStr = formatter.FormatOpcode(op, wdis);
|
||||
|
||||
string formattedOperand = null;
|
||||
@ -268,6 +267,7 @@ namespace SourceGen.AsmGen {
|
||||
}
|
||||
string commentStr = formatter.FormatEolComment(eolComment);
|
||||
|
||||
string replMnemonic = gen.ModifyOpcode(offset, op);
|
||||
if (attr.Length != instrBytes) {
|
||||
// This instruction has another instruction inside it. Throw out what we
|
||||
// computed and just output as bytes.
|
||||
|
@ -85,14 +85,14 @@ namespace SourceGen.AsmGen {
|
||||
List<string> GenerateSource(BackgroundWorker worker);
|
||||
|
||||
/// <summary>
|
||||
/// Provides an opportunity for the assembler to replace a mnemonic with another. This
|
||||
/// is primarily intended for undocumented ops, which don't have standard mnemonics,
|
||||
/// and hence can vary between assemblers.
|
||||
/// Provides an opportunity for the assembler to replace a mnemonic with another, or
|
||||
/// output an instruction as hex bytes.
|
||||
/// </summary>
|
||||
/// <param name="offset">Opcode offset.</param>
|
||||
/// <param name="op">Opcode to replace.</param>
|
||||
/// <returns>Replacement mnemonic, an empty string if the original is fine, or
|
||||
/// null if the op is not supported at all and should be emitted as hex.</returns>
|
||||
string ReplaceMnemonic(OpDef op);
|
||||
/// null if the op is unsupported or broken and should be emitted as hex.</returns>
|
||||
string ModifyOpcode(int offset, OpDef op);
|
||||
|
||||
/// <summary>
|
||||
/// Generates an opcode/operand pair for a short sequence of bytes (1-4 bytes).
|
||||
@ -163,6 +163,9 @@ namespace SourceGen.AsmGen {
|
||||
void OutputLine(string fullLine);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Enumeration of quirky or buggy behavior that GenCommon needs to handle.
|
||||
/// </summary>
|
||||
public class AssemblerQuirks {
|
||||
/// <summary>
|
||||
/// Are the arguments to MVN/MVP reversed?
|
||||
|
@ -148,8 +148,8 @@ code, but also needs to know how to handle the corner cases.</p>
|
||||
as case-sensitive. The <code>--case-sensitive</code> must be passed to
|
||||
the assembler.</li>
|
||||
<li>If you set the <code>--case-sensitive</code> flag, <b>all</b> opcodes
|
||||
and operands must be lower-case. Most of the flags used to show
|
||||
things in upper case must be disabled.</li>
|
||||
and operands must be lower-case. Most of the SourceGen options used to
|
||||
show things in upper case must be disabled.</li>
|
||||
<li>For 65816, selecting the bank byte is done with the back-quote ('`')
|
||||
rather than the caret ('^'). (There's a note in the docs to the effect
|
||||
that they plan to move to carets.)</li>
|
||||
@ -166,7 +166,7 @@ code, but also needs to know how to handle the corner cases.</p>
|
||||
<li>PC relative branches don't wrap around at bank boundaries.</li>
|
||||
<li>BRK <arg> is assembled to opcode $05 rather than $00.</li>
|
||||
<li>WDM is not supported.</li>
|
||||
<li>Source file names must not have spaces in them on Windows.</li>
|
||||
<li>Source file names may not have spaces in them on Windows.</li>
|
||||
</ul>
|
||||
|
||||
<p>Quirks:</p>
|
||||
@ -181,10 +181,11 @@ code, but also needs to know how to handle the corner cases.</p>
|
||||
<li>Undocumented opcodes: SBX ($cb) uses the mnemonic AXS. All other
|
||||
opcodes match up with the "unintended opcodes" document.</li>
|
||||
<li>ca65 is implemented as a single-pass assembler, so label widths
|
||||
can't always be known in time. For example, if you .ORG $0000 after
|
||||
the point where the labels are used, the assembler will already have
|
||||
generated them as absolute values. Width disambiguation must be applied
|
||||
to instructions that aren't ambiguous to multi-pass assemblers.</li>
|
||||
can't always be known in time. For example, if you use some zero-page
|
||||
labels, but they're defined via .ORG $0000 after the point where the
|
||||
labels are used, the assembler will already have generated them as
|
||||
absolute values. Width disambiguation must be applied to operands
|
||||
that wouldn't be ambiguous to a multi-pass assembler.</li>
|
||||
<li>The assembler is geared toward generating relocatable code with
|
||||
multiple segments (it is, after all, an assembler for a C compiler).
|
||||
A linker script is expected to be provided for anything complex. Since
|
||||
@ -201,8 +202,12 @@ code, but also needs to know how to handle the corner cases.</p>
|
||||
<ul>
|
||||
<li>PC relative branches don't wrap around at bank boundaries.</li>
|
||||
<li>For some failures, an exit code of zero is returned.</li>
|
||||
<li>Some indexed store instructions cause errors if the label isn't
|
||||
unambiguously DP (e.g. `STX $00,X` vs. `STX $0000,X`).</li>
|
||||
<li>Some DP indexed store instructions cause errors if the label isn't
|
||||
unambiguously DP (e.g. <code>STX $00,X</code> vs.
|
||||
<code>STX $0000,X</code>). This isn't a problem with project/platform
|
||||
symbols, which are output as two-digit hex values when possible, but
|
||||
causes failures when direct page locations are included in the project
|
||||
and given labels.</li>
|
||||
</ul>
|
||||
|
||||
<p>Quirks:</p>
|
||||
|
289
SourceGen/SGTestData/Expected/2014-label-dp_Merlin32.S
Normal file
289
SourceGen/SGTestData/Expected/2014-label-dp_Merlin32.S
Normal file
@ -0,0 +1,289 @@
|
||||
;6502bench SourceGen v1.1.0-dev1
|
||||
org $1000
|
||||
sec
|
||||
xce
|
||||
jsr L101F
|
||||
jsr L10AB
|
||||
jsr L10F2
|
||||
jsr L1106
|
||||
jsr L1109
|
||||
jsr L112C
|
||||
jsr L11F9
|
||||
jsr L11FC
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
brk $80
|
||||
|
||||
L101F dfb $01,$80
|
||||
cop $80
|
||||
ora $80,S
|
||||
tsb L0080
|
||||
ora L0080
|
||||
asl L0080
|
||||
dfb $07,$80
|
||||
php
|
||||
ora #$80
|
||||
asl A
|
||||
phd
|
||||
tsb: L0086
|
||||
ora: L0086
|
||||
asl: L0086
|
||||
oral L0089
|
||||
bpl L1041
|
||||
L1041 ora (L0080),y
|
||||
dfb $12,$80
|
||||
ora ($80,S),y
|
||||
trb L0080
|
||||
ora L0080,x
|
||||
asl L0080,x
|
||||
ora [L0080],y
|
||||
clc
|
||||
ora L0086,y
|
||||
inc A
|
||||
tcs
|
||||
trb: L0086
|
||||
ora: L0086,x
|
||||
asl: L0086,x
|
||||
oral L0089,x
|
||||
jsr L0086
|
||||
dfb $21,$80
|
||||
jsl L0089
|
||||
and $80,S
|
||||
bit L0080
|
||||
and L0080
|
||||
rol L0080
|
||||
dfb $27,$80
|
||||
plp
|
||||
and #$80
|
||||
rol A
|
||||
pld
|
||||
bit: L0086
|
||||
and: L0086
|
||||
rol: L0086
|
||||
andl L0089
|
||||
bmi L1089
|
||||
L1089 and (L0080),y
|
||||
dfb $32,$80
|
||||
and ($80,S),y
|
||||
bit L0080,x
|
||||
and L0080,x
|
||||
rol L0080,x
|
||||
and [L0080],y
|
||||
sec
|
||||
and L0086,y
|
||||
dec A
|
||||
tsc
|
||||
bit: L0086,x
|
||||
and: L0086,x
|
||||
rol: L0086,x
|
||||
andl L0089,x
|
||||
rti
|
||||
|
||||
L10AB dfb $41,$80
|
||||
wdm $80
|
||||
eor $80,S
|
||||
mvp $84,$83
|
||||
eor L0080
|
||||
lsr L0080
|
||||
dfb $47,$80
|
||||
pha
|
||||
eor #$80
|
||||
lsr A
|
||||
phk
|
||||
jmp L10C2
|
||||
|
||||
L10C2 eor: L0086
|
||||
lsr: L0086
|
||||
eorl L0089
|
||||
bvc L10CE
|
||||
L10CE eor (L0080),y
|
||||
dfb $52,$80
|
||||
eor ($80,S),y
|
||||
mvn $84,$83
|
||||
eor L0080,x
|
||||
lsr L0080,x
|
||||
eor [L0080],y
|
||||
cli
|
||||
eor L0086,y
|
||||
phy
|
||||
tcd
|
||||
jml L10E7
|
||||
|
||||
L10E7 eor: L0086,x
|
||||
lsr: L0086,x
|
||||
eorl L0089,x
|
||||
rts
|
||||
|
||||
L10F2 dfb $61,$80
|
||||
per $0ff6
|
||||
adc $80,S
|
||||
stz L0080
|
||||
adc L0080
|
||||
ror L0080
|
||||
dfb $67,$80
|
||||
pla
|
||||
adc #$80
|
||||
ror A
|
||||
rtl
|
||||
|
||||
L1106 jmp (L0086)
|
||||
|
||||
L1109 adc: L0086
|
||||
ror: L0086
|
||||
adcl L0089
|
||||
bvs L1115
|
||||
L1115 adc (L0080),y
|
||||
dfb $72,$80
|
||||
adc ($80,S),y
|
||||
stz L0080,x
|
||||
adc L0080,x
|
||||
ror L0080,x
|
||||
adc [L0080],y
|
||||
sei
|
||||
adc L0086,y
|
||||
ply
|
||||
tdc
|
||||
jmp (L0086,x)
|
||||
|
||||
L112C adc: L0086,x
|
||||
ror: L0086,x
|
||||
adcl L0089,x
|
||||
bra L1138
|
||||
|
||||
L1138 dfb $81,$80
|
||||
brl L113D
|
||||
|
||||
L113D sta $80,S
|
||||
sty L0080
|
||||
sta L0080
|
||||
stx L0080
|
||||
dfb $87,$80
|
||||
dey
|
||||
bit #$80
|
||||
txa
|
||||
phb
|
||||
sty: L0086
|
||||
sta: L0086
|
||||
stx: L0086
|
||||
stal L0089
|
||||
bcc L115B
|
||||
L115B sta (L0080),y
|
||||
dfb $92,$80
|
||||
sta ($80,S),y
|
||||
dfb $94,$80
|
||||
sta L0080,x
|
||||
dfb $96,$80
|
||||
sta [L0080],y
|
||||
tya
|
||||
sta L0086,y
|
||||
txs
|
||||
txy
|
||||
stz: L0086
|
||||
sta: L0086,x
|
||||
stz: L0086,x
|
||||
stal L0089,x
|
||||
ldy #$80
|
||||
dfb $a1,$80
|
||||
ldx #$80
|
||||
lda $80,S
|
||||
ldy L0080
|
||||
lda L0080
|
||||
ldx L0080
|
||||
dfb $a7,$80
|
||||
tay
|
||||
lda #$80
|
||||
tax
|
||||
plb
|
||||
ldy: L0086
|
||||
lda: L0086
|
||||
ldx: L0086
|
||||
ldal L0089
|
||||
bcs L11A0
|
||||
L11A0 lda (L0080),y
|
||||
dfb $b2,$80
|
||||
lda ($80,S),y
|
||||
ldy L0080,x
|
||||
lda L0080,x
|
||||
ldx L0080,y
|
||||
lda [L0080],y
|
||||
clv
|
||||
lda L0086,y
|
||||
tsx
|
||||
tyx
|
||||
ldy: L0086,x
|
||||
lda: L0086,x
|
||||
ldx: L0086,y
|
||||
ldal L0089,x
|
||||
cpy #$80
|
||||
dfb $c1,$80
|
||||
rep #$00
|
||||
cmp $80,S
|
||||
cpy L0080
|
||||
cmp L0080
|
||||
dec L0080
|
||||
dfb $c7,$80
|
||||
iny
|
||||
cmp #$80
|
||||
dex
|
||||
wai
|
||||
cpy: L0086
|
||||
cmp: L0086
|
||||
dec: L0086
|
||||
cmpl L0089
|
||||
bne L11E5
|
||||
L11E5 cmp (L0080),y
|
||||
dfb $d2,$80
|
||||
cmp ($80,S),y
|
||||
dfb $d4,$80
|
||||
cmp L0080,x
|
||||
dec L0080,x
|
||||
cmp [L0080],y
|
||||
cld
|
||||
cmp L0086,y
|
||||
phx
|
||||
stp
|
||||
|
||||
L11F9 jml [L0086]
|
||||
|
||||
L11FC cmp: L0086,x
|
||||
dec: L0086,x
|
||||
cmpl L0089,x
|
||||
cpx #$80
|
||||
dfb $e1,$80
|
||||
sep #$00
|
||||
sbc $80,S
|
||||
cpx L0080
|
||||
sbc L0080
|
||||
inc L0080
|
||||
dfb $e7,$80
|
||||
inx
|
||||
sbc #$80
|
||||
nop
|
||||
xba
|
||||
cpx: L0086
|
||||
sbc: L0086
|
||||
inc: L0086
|
||||
sbcl L0089
|
||||
beq L122A
|
||||
L122A sbc (L0080),y
|
||||
dfb $f2,$80
|
||||
sbc ($80,S),y
|
||||
pea L0086
|
||||
sbc L0080,x
|
||||
inc L0080,x
|
||||
sbc [L0080],y
|
||||
sed
|
||||
sbc L0086,y
|
||||
plx
|
||||
xce
|
||||
jsr (L0086,x)
|
||||
sbc: L0086,x
|
||||
inc: L0086,x
|
||||
sbcl L0089,x
|
||||
org $0080
|
||||
L0080 bit L0082
|
||||
L0082 bit L0082
|
||||
bit L0082
|
||||
L0086 bit: L0086
|
||||
L0089 ldal L0089
|
@ -48,6 +48,18 @@ namespace SourceGen {
|
||||
Constant // constant value
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns true if the symbol's type is an internal label (auto or user). Returns
|
||||
/// false for external addresses and constants.
|
||||
/// </summary>
|
||||
public bool IsInternalLabel {
|
||||
get {
|
||||
return SymbolType == Type.LocalOrGlobalAddr ||
|
||||
SymbolType == Type.GlobalAddr ||
|
||||
SymbolType == Type.GlobalAddrExport;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Label sent to assembler.
|
||||
@ -74,6 +86,7 @@ namespace SourceGen {
|
||||
/// </summary>
|
||||
public string SourceTypeString { get; private set; }
|
||||
|
||||
|
||||
// No nullary constructor.
|
||||
private Symbol() { }
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user