1
0
mirror of https://github.com/fadden/6502bench.git synced 2025-08-13 15:25:45 +00:00

Correctly handle embedded instruction edge case

This began with a change to support "BRK <operand>" in cc65.  The
assembler only supports this for 65816 projects, so we detect that
and enable it when available.

While fiddling with some test code an assertion fired.  This
revealed a minor issue in the code analyzer: when overwriting inline
data with instructions, we weren't resetting the format descriptor.

The code that exercises it, which requires two-byte BRKs and an
inline BRK handler in an extension script, has been added to test
2022-extension-scripts.

The new regression test revealed a flaw in the 64tass code
generator's character encoding scanner that caused it to hang.
Fixed.
This commit is contained in:
Andy McFadden
2019-10-19 16:23:42 -07:00
parent e5da5ced95
commit b6e571afc2
14 changed files with 134 additions and 35 deletions

View File

@@ -23,10 +23,11 @@ namespace SourceGen {
/// <summary> /// <summary>
/// Analyzer attribute holder. Contains the output of the instruction and data analyzers. /// Analyzer attribute holder. Contains the output of the instruction and data analyzers.
/// Every byte in the input file has one of these associated with it. /// Every byte in the input file has one of these associated with it.
/// /// </summary>
/// <remarks>
/// (Yes, it's a mutable struct. Yes, that fact has bitten me a few times. The array /// (Yes, it's a mutable struct. Yes, that fact has bitten me a few times. The array
/// of these may have millions of elements, so the reduction in overhead seems worthwhile.) /// of these may have millions of elements, so the reduction in overhead seems worthwhile.)
/// </summary> /// </remarks>
public struct Anattrib { public struct Anattrib {
[FlagsAttribute] [FlagsAttribute]
private enum AttribFlags { private enum AttribFlags {
@@ -351,5 +352,22 @@ namespace SourceGen {
sb.Append(IsBranchTarget ? '>' : blank); sb.Append(IsBranchTarget ? '>' : blank);
return sb.ToString(); return sb.ToString();
} }
public override string ToString() {
StringBuilder sb = new StringBuilder();
if (IsInstruction) {
sb.Append("Inst");
} else if (IsData) {
sb.Append("Data");
} else if (IsInlineData) {
sb.Append("Inli");
}
if (IsStart) {
sb.Append("Start");
}
sb.Append(" len=");
sb.Append(Length);
return sb.ToString();
}
} }
} }

View File

@@ -326,11 +326,20 @@ namespace SourceGen.AsmGen {
// IGenerator // IGenerator
public string ModifyOpcode(int offset, OpDef op) { public string ModifyOpcode(int offset, OpDef op) {
if (op == OpDef.OpBRK_StackInt || (op == OpDef.OpWDM_WDM && mAsmVersion < V2_18)) { if (op == OpDef.OpBRK_StackInt) {
// cc65 v2.17 doesn't support WDM, and assembles BRK <arg> to opcode $05. if (mAsmVersion < V2_18) {
// cc65 v2.18 only supports two-byte BRK on 65816 code. // cc65 v2.17 assembles BRK <arg> to opcode $05
// https://github.com/cc65/cc65/issues/716
return null;
} else if (Project.CpuDef.Type != CpuDef.CpuType.Cpu65816) {
// cc65 v2.18 only supports BRK <arg> on 65816 (?!)
return null;
} else {
return string.Empty;
}
} else if (op == OpDef.OpWDM_WDM && mAsmVersion < V2_18) {
// cc65 v2.17 doesn't support WDM
// https://github.com/cc65/cc65/issues/715 // https://github.com/cc65/cc65/issues/715
// https://github.com/cc65/cc65/issues/716
return null; return null;
} else if (op.IsUndocumented) { } else if (op.IsUndocumented) {
if (sUndocMap.TryGetValue(op.Mnemonic, out string newValue)) { if (sUndocMap.TryGetValue(op.Mnemonic, out string newValue)) {

View File

@@ -309,7 +309,19 @@ namespace SourceGen.AsmGen {
return; return;
} }
offset += attr.Length; if (attr.IsInstructionStart) {
// look for embedded instructions, which might have formatted char data
int len;
for (len = 1; len < attr.Length; len++) {
if (Project.GetAnattrib(offset + len).IsInstructionStart) {
break;
}
}
offset += len;
} else {
// data items
offset += attr.Length;
}
} }
} }

View File

@@ -534,9 +534,11 @@ namespace SourceGen {
if (mAnattribs[i].IsData) { if (mAnattribs[i].IsData) {
LogW(i, "Stripping mid-instruction data flag"); LogW(i, "Stripping mid-instruction data flag");
mAnattribs[i].IsData = false; mAnattribs[i].IsData = false;
mAnattribs[i].DataDescriptor = null;
} else if (mAnattribs[i].IsInlineData) { } else if (mAnattribs[i].IsInlineData) {
LogW(i, "Stripping mid-instruction inline-data flag"); LogW(i, "Stripping mid-instruction inline-data flag");
mAnattribs[i].IsInlineData = false; mAnattribs[i].IsInlineData = false;
mAnattribs[i].DataDescriptor = null;
} }
mAnattribs[i].IsInstruction = true; mAnattribs[i].IsInstruction = true;
} }

View File

@@ -607,7 +607,8 @@ namespace SourceGen {
/// </summary> /// </summary>
public void Validate() { public void Validate() {
// Confirm that we can walk through the file, stepping directly from the start // Confirm that we can walk through the file, stepping directly from the start
// of one thing to the start of the next. // of one thing to the start of the next. We won't normally do this, because
// we need to watch for embedded instructions.
int offset = 0; int offset = 0;
while (offset < mFileData.Length) { while (offset < mFileData.Length) {
Anattrib attr = mAnattribs[offset]; Anattrib attr = mAnattribs[offset];
@@ -619,6 +620,9 @@ namespace SourceGen {
// Sometimes embedded instructions continue past the "outer" instruction, // Sometimes embedded instructions continue past the "outer" instruction,
// usually because we're misinterpreting the code. We need to deal with // usually because we're misinterpreting the code. We need to deal with
// that here. // that here.
//
// One fun way to cause this is to have inline data from a plugin that got
// overwritten by the code analyzer. See test 2022 for an example.
int extraInstrBytes = 0; int extraInstrBytes = 0;
while (offset < mFileData.Length && mAnattribs[offset].IsInstruction && while (offset < mFileData.Length && mAnattribs[offset].IsInstruction &&
!mAnattribs[offset].IsInstructionStart) { !mAnattribs[offset].IsInstructionStart) {

View File

@@ -34,7 +34,7 @@ namespace RuntimeData.Test2022 {
return; return;
} }
byte func = mFileData[offset + 1]; byte func = mFileData[offset + 1];
if (func < 0x01 || func > 0x02) { if (func != 0x85 && (func < 0x01 || func > 0x02)) {
return; return;
} }
@@ -91,6 +91,9 @@ namespace RuntimeData.Test2022 {
mAppRef.SetInlineDataFormat(nextOff, 8, DataType.StringGeneric, mAppRef.SetInlineDataFormat(nextOff, 8, DataType.StringGeneric,
DataSubType.HighAscii, null); DataSubType.HighAscii, null);
break; break;
case 0x85:
// do nothing further
break;
} }
} }
} }

View File

@@ -1,37 +1,37 @@
### 6502bench SourceGen dis65 v1.0 ### ### 6502bench SourceGen dis65 v1.0 ###
{ {
"_ContentVersion":2,"FileDataLength":203,"FileDataCrc32":-1621468157,"ProjectProps":{ "_ContentVersion":2,"FileDataLength":213,"FileDataCrc32":-798098677,"ProjectProps":{
"CpuName":"65816","IncludeUndocumentedInstr":false,"TwoByteBrk":true,"EntryFlags":32702671,"AutoLabelStyle":"Simple","AnalysisParams":{ "CpuName":"65816","IncludeUndocumentedInstr":false,"TwoByteBrk":true,"EntryFlags":32702671,"AutoLabelStyle":"Simple","AnalysisParams":{
"AnalyzeUncategorizedData":true,"DefaultTextScanMode":"LowHighAscii","MinCharsForString":4,"SeekNearbyTargets":true,"SmartPlpHandling":true}, "AnalyzeUncategorizedData":true,"DefaultTextScanMode":"LowHighAscii","MinCharsForString":4,"SeekNearbyTargets":true,"SmartPlpHandling":true},
"PlatformSymbolFileIdentifiers":["PROJ:2022-extension-scripts.sym65"],"ExtensionScriptFileIdentifiers":["PROJ:2022-extension-scripts-a.cs","PROJ:2022-extension-scripts-b.cs"],"ProjectSyms":{ "PlatformSymbolFileIdentifiers":["PROJ:2022-extension-scripts.sym65"],"ExtensionScriptFileIdentifiers":["PROJ:2022-extension-scripts-a.cs","PROJ:2022-extension-scripts-b.cs"],"ProjectSyms":{
"PrintInlineDciString":{ "PrintInlineDciString":{
"DataDescriptor":{ "DataDescriptor":{
"Length":1,"Format":"NumericLE","SubFormat":"Hex","SymbolRef":null}, "Length":1,"Format":"NumericLE","SubFormat":"Hex","SymbolRef":null},
"Comment":"","HasWidth":false,"Label":"PrintInlineDciString","Value":77824,"Source":"Project","Type":"ExternalAddr"}}}, "Comment":"","HasWidth":false,"Direction":"ReadWrite","MultiMask":null,"Label":"PrintInlineDciString","Value":77824,"Source":"Project","Type":"ExternalAddr"}}},
"AddressMap":[{ "AddressMap":[{
"Offset":0,"Addr":4096}, "Offset":0,"Addr":4096},
{ {
"Offset":182,"Addr":4352}],"TypeHints":[{ "Offset":192,"Addr":4352}],"TypeHints":[{
"Low":0,"High":0,"Hint":"Code"}],"StatusFlagOverrides":{ "Low":0,"High":0,"Hint":"Code"}],"StatusFlagOverrides":{
}, },
"Comments":{ "Comments":{
}, "181":"split across address change"},
"LongComments":{ "LongComments":{
}, },
"Notes":{ "Notes":{
}, },
"UserLabels":{ "UserLabels":{
"129":{ "139":{
"Label":"PrintInline8String","Value":4225,"Source":"User","Type":"LocalOrGlobalAddr"}, "Label":"PrintInline8String","Value":4225,"Source":"User","Type":"LocalOrGlobalAddr"},
"130":{ "140":{
"Label":"PrintInlineRev8String","Value":4226,"Source":"User","Type":"LocalOrGlobalAddr"}, "Label":"PrintInlineRev8String","Value":4226,"Source":"User","Type":"LocalOrGlobalAddr"},
"131":{ "141":{
"Label":"PrintInlineNullString","Value":4227,"Source":"User","Type":"LocalOrGlobalAddr"}, "Label":"PrintInlineNullString","Value":4227,"Source":"User","Type":"LocalOrGlobalAddr"},
"160":{ "170":{
"Label":"data02","Value":4256,"Source":"User","Type":"LocalOrGlobalAddr"}, "Label":"data02","Value":4256,"Source":"User","Type":"LocalOrGlobalAddr"},
"163":{ "173":{
"Label":"data03","Value":4259,"Source":"User","Type":"LocalOrGlobalAddr"}, "Label":"data03","Value":4259,"Source":"User","Type":"LocalOrGlobalAddr"},
"132":{ "142":{
"Label":"data01","Value":4228,"Source":"User","Type":"LocalOrGlobalAddr"}}, "Label":"data01","Value":4228,"Source":"User","Type":"LocalOrGlobalAddr"}},
"OperandFormats":{ "OperandFormats":{
}, },

View File

@@ -25,13 +25,20 @@ PrintInlineDciString = $013000
.text $14,$00,"string with length/2" .text $14,$00,"string with length/2"
jsl PrintInlineDciString jsl PrintInlineDciString
.shift "DCI string" .shift "DCI string"
jsr L10AB jsr L10B5
jsr L110F jsr L110F
jsr L1108 jsr L1108
brk #$01 brk #$01
.word data01 .word data01
brk #$02 brk #$02
.word data02 .word data02
nop
jsr L1085
.byte $24
L1085 .byte $a9
.byte $00
sta $ff
.byte $ea
rts rts
PrintInline8String rts PrintInline8String rts
@@ -62,8 +69,8 @@ data02 .word data03
.enc sg_hiascii .enc sg_hiascii
data03 .text "AllEight" data03 .text "AllEight"
L10AB jsr PrintInlineNullString L10B5 jsr PrintInlineNullString ;split across address change
per $8023 per $802d
rtl rtl
.byte $65 .byte $65

View File

@@ -18,13 +18,20 @@ PrintInlineDciString equ $013000
strl 'string with length/2' strl 'string with length/2'
jsl PrintInlineDciString jsl PrintInlineDciString
dci 'DCI string' dci 'DCI string'
jsr L10AB jsr L10B5
jsr L110F jsr L110F
jsr L1108 jsr L1108
brk $01 brk $01
dw data01 dw data01
brk $02 brk $02
dw data02 dw data02
nop
jsr L1085
dfb $24
L1085 dfb $a9
dfb $00
sta $ff
dfb $ea
rts rts
PrintInline8String rts PrintInline8String rts
@@ -54,8 +61,8 @@ data02 dw data03
dfb $80 dfb $80
data03 asc "AllEight" data03 asc "AllEight"
L10AB jsr PrintInlineNullString L10B5 jsr PrintInlineNullString ;split across address change
per $8023 per $802d
rtl rtl
dfb $65 dfb $65

View File

@@ -21,13 +21,20 @@ PrintInlineDciString = $013000
!text $14,$00,"string with length/2" !text $14,$00,"string with length/2"
jsl PrintInlineDciString jsl PrintInlineDciString
!text "DCI strin",$e7 !text "DCI strin",$e7
jsr L10AB jsr L10B5
jsr L110F jsr L110F
jsr L1108 jsr L1108
!byte $00,$01 !byte $00,$01
!word data01 !word data01
!byte $00,$02 !byte $00,$02
!word data02 !word data02
nop
jsr L1085
!byte $24
L1085 !byte $a9
!byte $00
sta $ff
!byte $ea
rts rts
PrintInline8String rts PrintInline8String rts
@@ -59,8 +66,8 @@ data02 !word data03
data03 !text "AllEight" data03 !text "AllEight"
} }
L10AB jsr PrintInlineNullString L10B5 jsr PrintInlineNullString ;split across address change
per $8023 per $802d
rtl rtl
!byte $65 !byte $65

View File

@@ -22,13 +22,20 @@ PrintInlineDciString = $013000
.byte $14,$00,"string with length/2" .byte $14,$00,"string with length/2"
jsl PrintInlineDciString jsl PrintInlineDciString
.byte "DCI strin",$e7 .byte "DCI strin",$e7
jsr L10AB jsr L10B5
jsr L110F jsr L110F
jsr L1108 jsr L1108
.byte $00,$01 brk $01
.word data01 .word data01
.byte $00,$02 brk $02
.word data02 .word data02
nop
jsr L1085
.byte $24
L1085: .byte $a9
.byte $00
sta $ff
.byte $ea
rts rts
PrintInline8String: rts PrintInline8String: rts
@@ -63,8 +70,8 @@ data02: .word data03
.endmacro .endmacro
data03: HiAscii "AllEight" data03: HiAscii "AllEight"
L10AB: jsr PrintInlineNullString L10B5: jsr PrintInlineNullString ;split across address change
per $8023 per $802d
rtl rtl
.byte $65 .byte $65
@@ -79,7 +86,7 @@ L10AB: jsr PrintInlineNullString
L1108: jsl PrintInlineL2String L1108: jsl PrintInlineL2String
asl A asl A
.byte $00,$60 brk $60
L110F: jsr PrintInlineNullString L110F: jsr PrintInlineNullString
adc $6e adc $6e

View File

@@ -1,7 +1,7 @@
# 6502bench SourceGen generated linker script for 2022-extension-scripts # 6502bench SourceGen generated linker script for 2022-extension-scripts
MEMORY { MEMORY {
MAIN: file=%O, start=%S, size=65536; MAIN: file=%O, start=%S, size=65536;
# MEM000: file=%O, start=$1000, size=182; # MEM000: file=%O, start=$1000, size=192;
# MEM001: file=%O, start=$1100, size=21; # MEM001: file=%O, start=$1100, size=21;
} }
SEGMENTS { SEGMENTS {

View File

@@ -43,6 +43,29 @@ PrintInlineDciString equ $013000 ;EDIT: add to project symbols
brk $02 brk $02
dw data02 dw data02
; Handle an edge case where the inline formatting gets thrown out.
; Two paths: BIT $A9 / BRK $85 / inline $FF/EA goes first, then
; LDA $00 / STA $FF / NOP goes. When we get to the STA we notice
; that it's marked as inline data, so we remove it from $85 $ff
; but not from $ea.
;
; If we try to walk through the file, advancing offset by the anattrib
; length, we will traverse the first path, which (with 2-byte BRKs)
; runs into the $FF, which is marked as an instruction but not an
; instruction start.
;
; Switching to 1-byte BRKs makes the $85 an inline data item rather
; than an instruction. When we come back through, we LDA $00 and
; then skip over the next 3 bytes. No conflict.
nop
jsr edge1 ;alt path, evaluated later
dfb $24 ;1: BIT dp
edge1 dfb $a9 ;2: LDA imm
brk ;1: BRK <op>
dfb $85 ;2: STA imm
dfb $ff ;1: address $eaff
nop ;2:
rts rts
PrintInline8String rts ;EDIT: set label PrintInline8String rts ;EDIT: set label