1
0
mirror of https://github.com/fadden/6502bench.git synced 2024-07-02 04:29:28 +00:00

Work around Merlin 32 instruction parsing bug

The 2014-label-dp test now passes.  Prior regression tests are
unaffected.

Also, renamed an IGenerator interface to more accurately reflect
its role.

(issue #37)
This commit is contained in:
Andy McFadden 2018-11-02 13:49:27 -07:00
parent 2096bd2c66
commit c80be07f73
8 changed files with 356 additions and 24 deletions

View File

@ -259,11 +259,12 @@ namespace SourceGen.AsmGen {
}
/// <summary>
/// Map the mnemonics we chose for undocumented opcodes to the cc65 mnemonics.
/// After switching to the Unintended Opcodes mnemonics there's almost no difference.
/// Map the undocumented opcodes to the cc65 mnemonics. There's almost no difference
/// vs. the Unintended Opcodes mnemonics.
///
/// We don't include the double- and triple-byte NOPs here, as cc65 doesn't
/// appear to have a definition for them (as of 2.17).
/// appear to have a definition for them (as of 2.17). We also omit the alias
/// for SBC. These will all be output as hex.
/// </summary>
private static Dictionary<string, string> sUndocMap = new Dictionary<string, string>() {
{ OpName.ALR, "alr" }, // imm 0x4b
@ -288,7 +289,7 @@ namespace SourceGen.AsmGen {
};
// IGenerator
public string ReplaceMnemonic(OpDef op) {
public string ModifyOpcode(int offset, OpDef op) {
if ((op == OpDef.OpWDM_WDM || op == OpDef.OpBRK_StackInt) && mAsmVersion <= V2_17) {
// cc65 v2.17 doesn't support WDM, and assembles BRK <arg> to opcode $05.
// https://github.com/cc65/cc65/issues/715

View File

@ -328,12 +328,34 @@ namespace SourceGen.AsmGen {
}
// IGenerator
public string ReplaceMnemonic(OpDef op) {
public string ModifyOpcode(int offset, OpDef op) {
if (op.IsUndocumented) {
return null;
} else {
return string.Empty;
}
// The assembler works correctly if the symbol is defined as a two-digit hex
// value (e.g. "foo equ $80") but fails if it's four (e.g. "foo equ $0080"). We
// output symbols with minimal digits, but we have no control over labels when
// the code has a zero-page EQU. So if the operand is a reference to a user
// label, we need to output the instruction as hex.
if (op == OpDef.OpPEI_StackDPInd ||
op == OpDef.OpSTY_DPIndexX ||
op == OpDef.OpSTX_DPIndexY ||
op.AddrMode == OpDef.AddressMode.DPIndLong ||
op.AddrMode == OpDef.AddressMode.DPInd ||
op.AddrMode == OpDef.AddressMode.DPIndexXInd) {
FormatDescriptor dfd = Project.GetAnattrib(offset).DataDescriptor;
if (dfd != null && dfd.HasSymbol) {
// It has a symbol. See if the symbol target is a label (auto or user).
if (Project.SymbolTable.TryGetValue(dfd.SymbolRef.Label, out Symbol sym)) {
if (sym.IsInternalLabel) {
return null;
}
}
}
}
return string.Empty;
}
// IGenerator

View File

@ -244,7 +244,6 @@ namespace SourceGen.AsmGen {
} else if (cpuDef.Type == CpuDef.CpuType.Cpu6502 && cpuDef.HasUndocumented) {
cpuStr = "6502i";
} else {
// 6502 def includes undocumented ops
cpuStr = "6502";
}
@ -253,7 +252,7 @@ namespace SourceGen.AsmGen {
}
// IGenerator
public string ReplaceMnemonic(OpDef op) {
public string ModifyOpcode(int offset, OpDef op) {
if (op.IsUndocumented) {
if (Project.CpuDef.Type == CpuDef.CpuType.Cpu65C02) {
// none of the "LDD" stuff is handled

View File

@ -172,7 +172,6 @@ namespace SourceGen.AsmGen {
wdis = OpDef.GetWidthDisambiguation(instrLen, operand);
}
string replMnemonic = gen.ReplaceMnemonic(op);
string opcodeStr = formatter.FormatOpcode(op, wdis);
string formattedOperand = null;
@ -268,6 +267,7 @@ namespace SourceGen.AsmGen {
}
string commentStr = formatter.FormatEolComment(eolComment);
string replMnemonic = gen.ModifyOpcode(offset, op);
if (attr.Length != instrBytes) {
// This instruction has another instruction inside it. Throw out what we
// computed and just output as bytes.

View File

@ -85,14 +85,14 @@ namespace SourceGen.AsmGen {
List<string> GenerateSource(BackgroundWorker worker);
/// <summary>
/// Provides an opportunity for the assembler to replace a mnemonic with another. This
/// is primarily intended for undocumented ops, which don't have standard mnemonics,
/// and hence can vary between assemblers.
/// Provides an opportunity for the assembler to replace a mnemonic with another, or
/// output an instruction as hex bytes.
/// </summary>
/// <param name="offset">Opcode offset.</param>
/// <param name="op">Opcode to replace.</param>
/// <returns>Replacement mnemonic, an empty string if the original is fine, or
/// null if the op is not supported at all and should be emitted as hex.</returns>
string ReplaceMnemonic(OpDef op);
/// null if the op is unsupported or broken and should be emitted as hex.</returns>
string ModifyOpcode(int offset, OpDef op);
/// <summary>
/// Generates an opcode/operand pair for a short sequence of bytes (1-4 bytes).
@ -163,6 +163,9 @@ namespace SourceGen.AsmGen {
void OutputLine(string fullLine);
}
/// <summary>
/// Enumeration of quirky or buggy behavior that GenCommon needs to handle.
/// </summary>
public class AssemblerQuirks {
/// <summary>
/// Are the arguments to MVN/MVP reversed?

View File

@ -148,8 +148,8 @@ code, but also needs to know how to handle the corner cases.</p>
as case-sensitive. The <code>--case-sensitive</code> must be passed to
the assembler.</li>
<li>If you set the <code>--case-sensitive</code> flag, <b>all</b> opcodes
and operands must be lower-case. Most of the flags used to show
things in upper case must be disabled.</li>
and operands must be lower-case. Most of the SourceGen options used to
show things in upper case must be disabled.</li>
<li>For 65816, selecting the bank byte is done with the back-quote ('`')
rather than the caret ('^'). (There's a note in the docs to the effect
that they plan to move to carets.)</li>
@ -166,7 +166,7 @@ code, but also needs to know how to handle the corner cases.</p>
<li>PC relative branches don't wrap around at bank boundaries.</li>
<li>BRK &lt;arg&gt; is assembled to opcode $05 rather than $00.</li>
<li>WDM is not supported.</li>
<li>Source file names must not have spaces in them on Windows.</li>
<li>Source file names may not have spaces in them on Windows.</li>
</ul>
<p>Quirks:</p>
@ -181,10 +181,11 @@ code, but also needs to know how to handle the corner cases.</p>
<li>Undocumented opcodes: SBX ($cb) uses the mnemonic AXS. All other
opcodes match up with the "unintended opcodes" document.</li>
<li>ca65 is implemented as a single-pass assembler, so label widths
can't always be known in time. For example, if you .ORG $0000 after
the point where the labels are used, the assembler will already have
generated them as absolute values. Width disambiguation must be applied
to instructions that aren't ambiguous to multi-pass assemblers.</li>
can't always be known in time. For example, if you use some zero-page
labels, but they're defined via .ORG $0000 after the point where the
labels are used, the assembler will already have generated them as
absolute values. Width disambiguation must be applied to operands
that wouldn't be ambiguous to a multi-pass assembler.</li>
<li>The assembler is geared toward generating relocatable code with
multiple segments (it is, after all, an assembler for a C compiler).
A linker script is expected to be provided for anything complex. Since
@ -201,8 +202,12 @@ code, but also needs to know how to handle the corner cases.</p>
<ul>
<li>PC relative branches don't wrap around at bank boundaries.</li>
<li>For some failures, an exit code of zero is returned.</li>
<li>Some indexed store instructions cause errors if the label isn't
unambiguously DP (e.g. `STX $00,X` vs. `STX $0000,X`).</li>
<li>Some DP indexed store instructions cause errors if the label isn't
unambiguously DP (e.g. <code>STX $00,X</code> vs.
<code>STX $0000,X</code>). This isn't a problem with project/platform
symbols, which are output as two-digit hex values when possible, but
causes failures when direct page locations are included in the project
and given labels.</li>
</ul>
<p>Quirks:</p>

View File

@ -0,0 +1,289 @@
;6502bench SourceGen v1.1.0-dev1
org $1000
sec
xce
jsr L101F
jsr L10AB
jsr L10F2
jsr L1106
jsr L1109
jsr L112C
jsr L11F9
jsr L11FC
nop
nop
nop
brk $80
L101F dfb $01,$80
cop $80
ora $80,S
tsb L0080
ora L0080
asl L0080
dfb $07,$80
php
ora #$80
asl A
phd
tsb: L0086
ora: L0086
asl: L0086
oral L0089
bpl L1041
L1041 ora (L0080),y
dfb $12,$80
ora ($80,S),y
trb L0080
ora L0080,x
asl L0080,x
ora [L0080],y
clc
ora L0086,y
inc A
tcs
trb: L0086
ora: L0086,x
asl: L0086,x
oral L0089,x
jsr L0086
dfb $21,$80
jsl L0089
and $80,S
bit L0080
and L0080
rol L0080
dfb $27,$80
plp
and #$80
rol A
pld
bit: L0086
and: L0086
rol: L0086
andl L0089
bmi L1089
L1089 and (L0080),y
dfb $32,$80
and ($80,S),y
bit L0080,x
and L0080,x
rol L0080,x
and [L0080],y
sec
and L0086,y
dec A
tsc
bit: L0086,x
and: L0086,x
rol: L0086,x
andl L0089,x
rti
L10AB dfb $41,$80
wdm $80
eor $80,S
mvp $84,$83
eor L0080
lsr L0080
dfb $47,$80
pha
eor #$80
lsr A
phk
jmp L10C2
L10C2 eor: L0086
lsr: L0086
eorl L0089
bvc L10CE
L10CE eor (L0080),y
dfb $52,$80
eor ($80,S),y
mvn $84,$83
eor L0080,x
lsr L0080,x
eor [L0080],y
cli
eor L0086,y
phy
tcd
jml L10E7
L10E7 eor: L0086,x
lsr: L0086,x
eorl L0089,x
rts
L10F2 dfb $61,$80
per $0ff6
adc $80,S
stz L0080
adc L0080
ror L0080
dfb $67,$80
pla
adc #$80
ror A
rtl
L1106 jmp (L0086)
L1109 adc: L0086
ror: L0086
adcl L0089
bvs L1115
L1115 adc (L0080),y
dfb $72,$80
adc ($80,S),y
stz L0080,x
adc L0080,x
ror L0080,x
adc [L0080],y
sei
adc L0086,y
ply
tdc
jmp (L0086,x)
L112C adc: L0086,x
ror: L0086,x
adcl L0089,x
bra L1138
L1138 dfb $81,$80
brl L113D
L113D sta $80,S
sty L0080
sta L0080
stx L0080
dfb $87,$80
dey
bit #$80
txa
phb
sty: L0086
sta: L0086
stx: L0086
stal L0089
bcc L115B
L115B sta (L0080),y
dfb $92,$80
sta ($80,S),y
dfb $94,$80
sta L0080,x
dfb $96,$80
sta [L0080],y
tya
sta L0086,y
txs
txy
stz: L0086
sta: L0086,x
stz: L0086,x
stal L0089,x
ldy #$80
dfb $a1,$80
ldx #$80
lda $80,S
ldy L0080
lda L0080
ldx L0080
dfb $a7,$80
tay
lda #$80
tax
plb
ldy: L0086
lda: L0086
ldx: L0086
ldal L0089
bcs L11A0
L11A0 lda (L0080),y
dfb $b2,$80
lda ($80,S),y
ldy L0080,x
lda L0080,x
ldx L0080,y
lda [L0080],y
clv
lda L0086,y
tsx
tyx
ldy: L0086,x
lda: L0086,x
ldx: L0086,y
ldal L0089,x
cpy #$80
dfb $c1,$80
rep #$00
cmp $80,S
cpy L0080
cmp L0080
dec L0080
dfb $c7,$80
iny
cmp #$80
dex
wai
cpy: L0086
cmp: L0086
dec: L0086
cmpl L0089
bne L11E5
L11E5 cmp (L0080),y
dfb $d2,$80
cmp ($80,S),y
dfb $d4,$80
cmp L0080,x
dec L0080,x
cmp [L0080],y
cld
cmp L0086,y
phx
stp
L11F9 jml [L0086]
L11FC cmp: L0086,x
dec: L0086,x
cmpl L0089,x
cpx #$80
dfb $e1,$80
sep #$00
sbc $80,S
cpx L0080
sbc L0080
inc L0080
dfb $e7,$80
inx
sbc #$80
nop
xba
cpx: L0086
sbc: L0086
inc: L0086
sbcl L0089
beq L122A
L122A sbc (L0080),y
dfb $f2,$80
sbc ($80,S),y
pea L0086
sbc L0080,x
inc L0080,x
sbc [L0080],y
sed
sbc L0086,y
plx
xce
jsr (L0086,x)
sbc: L0086,x
inc: L0086,x
sbcl L0089,x
org $0080
L0080 bit L0082
L0082 bit L0082
bit L0082
L0086 bit: L0086
L0089 ldal L0089

View File

@ -48,6 +48,18 @@ namespace SourceGen {
Constant // constant value
}
/// <summary>
/// Returns true if the symbol's type is an internal label (auto or user). Returns
/// false for external addresses and constants.
/// </summary>
public bool IsInternalLabel {
get {
return SymbolType == Type.LocalOrGlobalAddr ||
SymbolType == Type.GlobalAddr ||
SymbolType == Type.GlobalAddrExport;
}
}
/// <summary>
/// Label sent to assembler.
@ -74,6 +86,7 @@ namespace SourceGen {
/// </summary>
public string SourceTypeString { get; private set; }
// No nullary constructor.
private Symbol() { }