1
0
mirror of https://github.com/fadden/6502bench.git synced 2025-01-16 19:32:31 +00:00

Correctly handle embedded instruction edge case

This began with a change to support "BRK <operand>" in cc65.  The
assembler only supports this for 65816 projects, so we detect that
and enable it when available.

While fiddling with some test code an assertion fired.  This
revealed a minor issue in the code analyzer: when overwriting inline
data with instructions, we weren't resetting the format descriptor.

The code that exercises it, which requires two-byte BRKs and an
inline BRK handler in an extension script, has been added to test
2022-extension-scripts.

The new regression test revealed a flaw in the 64tass code
generator's character encoding scanner that caused it to hang.
Fixed.
This commit is contained in:
Andy McFadden 2019-10-19 16:23:42 -07:00
parent e5da5ced95
commit b6e571afc2
14 changed files with 134 additions and 35 deletions

View File

@ -23,10 +23,11 @@ namespace SourceGen {
/// <summary>
/// Analyzer attribute holder. Contains the output of the instruction and data analyzers.
/// Every byte in the input file has one of these associated with it.
///
/// </summary>
/// <remarks>
/// (Yes, it's a mutable struct. Yes, that fact has bitten me a few times. The array
/// of these may have millions of elements, so the reduction in overhead seems worthwhile.)
/// </summary>
/// </remarks>
public struct Anattrib {
[FlagsAttribute]
private enum AttribFlags {
@ -351,5 +352,22 @@ namespace SourceGen {
sb.Append(IsBranchTarget ? '>' : blank);
return sb.ToString();
}
public override string ToString() {
StringBuilder sb = new StringBuilder();
if (IsInstruction) {
sb.Append("Inst");
} else if (IsData) {
sb.Append("Data");
} else if (IsInlineData) {
sb.Append("Inli");
}
if (IsStart) {
sb.Append("Start");
}
sb.Append(" len=");
sb.Append(Length);
return sb.ToString();
}
}
}

View File

@ -326,11 +326,20 @@ namespace SourceGen.AsmGen {
// IGenerator
public string ModifyOpcode(int offset, OpDef op) {
if (op == OpDef.OpBRK_StackInt || (op == OpDef.OpWDM_WDM && mAsmVersion < V2_18)) {
// cc65 v2.17 doesn't support WDM, and assembles BRK <arg> to opcode $05.
// cc65 v2.18 only supports two-byte BRK on 65816 code.
if (op == OpDef.OpBRK_StackInt) {
if (mAsmVersion < V2_18) {
// cc65 v2.17 assembles BRK <arg> to opcode $05
// https://github.com/cc65/cc65/issues/716
return null;
} else if (Project.CpuDef.Type != CpuDef.CpuType.Cpu65816) {
// cc65 v2.18 only supports BRK <arg> on 65816 (?!)
return null;
} else {
return string.Empty;
}
} else if (op == OpDef.OpWDM_WDM && mAsmVersion < V2_18) {
// cc65 v2.17 doesn't support WDM
// https://github.com/cc65/cc65/issues/715
// https://github.com/cc65/cc65/issues/716
return null;
} else if (op.IsUndocumented) {
if (sUndocMap.TryGetValue(op.Mnemonic, out string newValue)) {

View File

@ -309,7 +309,19 @@ namespace SourceGen.AsmGen {
return;
}
offset += attr.Length;
if (attr.IsInstructionStart) {
// look for embedded instructions, which might have formatted char data
int len;
for (len = 1; len < attr.Length; len++) {
if (Project.GetAnattrib(offset + len).IsInstructionStart) {
break;
}
}
offset += len;
} else {
// data items
offset += attr.Length;
}
}
}

View File

@ -534,9 +534,11 @@ namespace SourceGen {
if (mAnattribs[i].IsData) {
LogW(i, "Stripping mid-instruction data flag");
mAnattribs[i].IsData = false;
mAnattribs[i].DataDescriptor = null;
} else if (mAnattribs[i].IsInlineData) {
LogW(i, "Stripping mid-instruction inline-data flag");
mAnattribs[i].IsInlineData = false;
mAnattribs[i].DataDescriptor = null;
}
mAnattribs[i].IsInstruction = true;
}

View File

@ -607,7 +607,8 @@ namespace SourceGen {
/// </summary>
public void Validate() {
// Confirm that we can walk through the file, stepping directly from the start
// of one thing to the start of the next.
// of one thing to the start of the next. We won't normally do this, because
// we need to watch for embedded instructions.
int offset = 0;
while (offset < mFileData.Length) {
Anattrib attr = mAnattribs[offset];
@ -619,6 +620,9 @@ namespace SourceGen {
// Sometimes embedded instructions continue past the "outer" instruction,
// usually because we're misinterpreting the code. We need to deal with
// that here.
//
// One fun way to cause this is to have inline data from a plugin that got
// overwritten by the code analyzer. See test 2022 for an example.
int extraInstrBytes = 0;
while (offset < mFileData.Length && mAnattribs[offset].IsInstruction &&
!mAnattribs[offset].IsInstructionStart) {

View File

@ -34,7 +34,7 @@ namespace RuntimeData.Test2022 {
return;
}
byte func = mFileData[offset + 1];
if (func < 0x01 || func > 0x02) {
if (func != 0x85 && (func < 0x01 || func > 0x02)) {
return;
}
@ -91,6 +91,9 @@ namespace RuntimeData.Test2022 {
mAppRef.SetInlineDataFormat(nextOff, 8, DataType.StringGeneric,
DataSubType.HighAscii, null);
break;
case 0x85:
// do nothing further
break;
}
}
}

View File

@ -1,37 +1,37 @@
### 6502bench SourceGen dis65 v1.0 ###
{
"_ContentVersion":2,"FileDataLength":203,"FileDataCrc32":-1621468157,"ProjectProps":{
"_ContentVersion":2,"FileDataLength":213,"FileDataCrc32":-798098677,"ProjectProps":{
"CpuName":"65816","IncludeUndocumentedInstr":false,"TwoByteBrk":true,"EntryFlags":32702671,"AutoLabelStyle":"Simple","AnalysisParams":{
"AnalyzeUncategorizedData":true,"DefaultTextScanMode":"LowHighAscii","MinCharsForString":4,"SeekNearbyTargets":true,"SmartPlpHandling":true},
"PlatformSymbolFileIdentifiers":["PROJ:2022-extension-scripts.sym65"],"ExtensionScriptFileIdentifiers":["PROJ:2022-extension-scripts-a.cs","PROJ:2022-extension-scripts-b.cs"],"ProjectSyms":{
"PrintInlineDciString":{
"DataDescriptor":{
"Length":1,"Format":"NumericLE","SubFormat":"Hex","SymbolRef":null},
"Comment":"","HasWidth":false,"Label":"PrintInlineDciString","Value":77824,"Source":"Project","Type":"ExternalAddr"}}},
"Comment":"","HasWidth":false,"Direction":"ReadWrite","MultiMask":null,"Label":"PrintInlineDciString","Value":77824,"Source":"Project","Type":"ExternalAddr"}}},
"AddressMap":[{
"Offset":0,"Addr":4096},
{
"Offset":182,"Addr":4352}],"TypeHints":[{
"Offset":192,"Addr":4352}],"TypeHints":[{
"Low":0,"High":0,"Hint":"Code"}],"StatusFlagOverrides":{
},
"Comments":{
},
"181":"split across address change"},
"LongComments":{
},
"Notes":{
},
"UserLabels":{
"129":{
"139":{
"Label":"PrintInline8String","Value":4225,"Source":"User","Type":"LocalOrGlobalAddr"},
"130":{
"140":{
"Label":"PrintInlineRev8String","Value":4226,"Source":"User","Type":"LocalOrGlobalAddr"},
"131":{
"141":{
"Label":"PrintInlineNullString","Value":4227,"Source":"User","Type":"LocalOrGlobalAddr"},
"160":{
"170":{
"Label":"data02","Value":4256,"Source":"User","Type":"LocalOrGlobalAddr"},
"163":{
"173":{
"Label":"data03","Value":4259,"Source":"User","Type":"LocalOrGlobalAddr"},
"132":{
"142":{
"Label":"data01","Value":4228,"Source":"User","Type":"LocalOrGlobalAddr"}},
"OperandFormats":{
},

View File

@ -25,13 +25,20 @@ PrintInlineDciString = $013000
.text $14,$00,"string with length/2"
jsl PrintInlineDciString
.shift "DCI string"
jsr L10AB
jsr L10B5
jsr L110F
jsr L1108
brk #$01
.word data01
brk #$02
.word data02
nop
jsr L1085
.byte $24
L1085 .byte $a9
.byte $00
sta $ff
.byte $ea
rts
PrintInline8String rts
@ -62,8 +69,8 @@ data02 .word data03
.enc sg_hiascii
data03 .text "AllEight"
L10AB jsr PrintInlineNullString
per $8023
L10B5 jsr PrintInlineNullString ;split across address change
per $802d
rtl
.byte $65

View File

@ -18,13 +18,20 @@ PrintInlineDciString equ $013000
strl 'string with length/2'
jsl PrintInlineDciString
dci 'DCI string'
jsr L10AB
jsr L10B5
jsr L110F
jsr L1108
brk $01
dw data01
brk $02
dw data02
nop
jsr L1085
dfb $24
L1085 dfb $a9
dfb $00
sta $ff
dfb $ea
rts
PrintInline8String rts
@ -54,8 +61,8 @@ data02 dw data03
dfb $80
data03 asc "AllEight"
L10AB jsr PrintInlineNullString
per $8023
L10B5 jsr PrintInlineNullString ;split across address change
per $802d
rtl
dfb $65

View File

@ -21,13 +21,20 @@ PrintInlineDciString = $013000
!text $14,$00,"string with length/2"
jsl PrintInlineDciString
!text "DCI strin",$e7
jsr L10AB
jsr L10B5
jsr L110F
jsr L1108
!byte $00,$01
!word data01
!byte $00,$02
!word data02
nop
jsr L1085
!byte $24
L1085 !byte $a9
!byte $00
sta $ff
!byte $ea
rts
PrintInline8String rts
@ -59,8 +66,8 @@ data02 !word data03
data03 !text "AllEight"
}
L10AB jsr PrintInlineNullString
per $8023
L10B5 jsr PrintInlineNullString ;split across address change
per $802d
rtl
!byte $65

View File

@ -22,13 +22,20 @@ PrintInlineDciString = $013000
.byte $14,$00,"string with length/2"
jsl PrintInlineDciString
.byte "DCI strin",$e7
jsr L10AB
jsr L10B5
jsr L110F
jsr L1108
.byte $00,$01
brk $01
.word data01
.byte $00,$02
brk $02
.word data02
nop
jsr L1085
.byte $24
L1085: .byte $a9
.byte $00
sta $ff
.byte $ea
rts
PrintInline8String: rts
@ -63,8 +70,8 @@ data02: .word data03
.endmacro
data03: HiAscii "AllEight"
L10AB: jsr PrintInlineNullString
per $8023
L10B5: jsr PrintInlineNullString ;split across address change
per $802d
rtl
.byte $65
@ -79,7 +86,7 @@ L10AB: jsr PrintInlineNullString
L1108: jsl PrintInlineL2String
asl A
.byte $00,$60
brk $60
L110F: jsr PrintInlineNullString
adc $6e

View File

@ -1,7 +1,7 @@
# 6502bench SourceGen generated linker script for 2022-extension-scripts
MEMORY {
MAIN: file=%O, start=%S, size=65536;
# MEM000: file=%O, start=$1000, size=182;
# MEM000: file=%O, start=$1000, size=192;
# MEM001: file=%O, start=$1100, size=21;
}
SEGMENTS {

View File

@ -43,6 +43,29 @@ PrintInlineDciString equ $013000 ;EDIT: add to project symbols
brk $02
dw data02
; Handle an edge case where the inline formatting gets thrown out.
; Two paths: BIT $A9 / BRK $85 / inline $FF/EA goes first, then
; LDA $00 / STA $FF / NOP goes. When we get to the STA we notice
; that it's marked as inline data, so we remove it from $85 $ff
; but not from $ea.
;
; If we try to walk through the file, advancing offset by the anattrib
; length, we will traverse the first path, which (with 2-byte BRKs)
; runs into the $FF, which is marked as an instruction but not an
; instruction start.
;
; Switching to 1-byte BRKs makes the $85 an inline data item rather
; than an instruction. When we come back through, we LDA $00 and
; then skip over the next 3 bytes. No conflict.
nop
jsr edge1 ;alt path, evaluated later
dfb $24 ;1: BIT dp
edge1 dfb $a9 ;2: LDA imm
brk ;1: BRK <op>
dfb $85 ;2: STA imm
dfb $ff ;1: address $eaff
nop ;2:
rts
PrintInline8String rts ;EDIT: set label