mirror of
https://github.com/fadden/6502bench.git
synced 2025-08-13 15:25:45 +00:00
Correctly handle embedded instruction edge case
This began with a change to support "BRK <operand>" in cc65. The assembler only supports this for 65816 projects, so we detect that and enable it when available. While fiddling with some test code an assertion fired. This revealed a minor issue in the code analyzer: when overwriting inline data with instructions, we weren't resetting the format descriptor. The code that exercises it, which requires two-byte BRKs and an inline BRK handler in an extension script, has been added to test 2022-extension-scripts. The new regression test revealed a flaw in the 64tass code generator's character encoding scanner that caused it to hang. Fixed.
This commit is contained in:
@@ -23,10 +23,11 @@ namespace SourceGen {
|
|||||||
/// <summary>
|
/// <summary>
|
||||||
/// Analyzer attribute holder. Contains the output of the instruction and data analyzers.
|
/// Analyzer attribute holder. Contains the output of the instruction and data analyzers.
|
||||||
/// Every byte in the input file has one of these associated with it.
|
/// Every byte in the input file has one of these associated with it.
|
||||||
///
|
/// </summary>
|
||||||
|
/// <remarks>
|
||||||
/// (Yes, it's a mutable struct. Yes, that fact has bitten me a few times. The array
|
/// (Yes, it's a mutable struct. Yes, that fact has bitten me a few times. The array
|
||||||
/// of these may have millions of elements, so the reduction in overhead seems worthwhile.)
|
/// of these may have millions of elements, so the reduction in overhead seems worthwhile.)
|
||||||
/// </summary>
|
/// </remarks>
|
||||||
public struct Anattrib {
|
public struct Anattrib {
|
||||||
[FlagsAttribute]
|
[FlagsAttribute]
|
||||||
private enum AttribFlags {
|
private enum AttribFlags {
|
||||||
@@ -351,5 +352,22 @@ namespace SourceGen {
|
|||||||
sb.Append(IsBranchTarget ? '>' : blank);
|
sb.Append(IsBranchTarget ? '>' : blank);
|
||||||
return sb.ToString();
|
return sb.ToString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public override string ToString() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
if (IsInstruction) {
|
||||||
|
sb.Append("Inst");
|
||||||
|
} else if (IsData) {
|
||||||
|
sb.Append("Data");
|
||||||
|
} else if (IsInlineData) {
|
||||||
|
sb.Append("Inli");
|
||||||
|
}
|
||||||
|
if (IsStart) {
|
||||||
|
sb.Append("Start");
|
||||||
|
}
|
||||||
|
sb.Append(" len=");
|
||||||
|
sb.Append(Length);
|
||||||
|
return sb.ToString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -326,11 +326,20 @@ namespace SourceGen.AsmGen {
|
|||||||
|
|
||||||
// IGenerator
|
// IGenerator
|
||||||
public string ModifyOpcode(int offset, OpDef op) {
|
public string ModifyOpcode(int offset, OpDef op) {
|
||||||
if (op == OpDef.OpBRK_StackInt || (op == OpDef.OpWDM_WDM && mAsmVersion < V2_18)) {
|
if (op == OpDef.OpBRK_StackInt) {
|
||||||
// cc65 v2.17 doesn't support WDM, and assembles BRK <arg> to opcode $05.
|
if (mAsmVersion < V2_18) {
|
||||||
// cc65 v2.18 only supports two-byte BRK on 65816 code.
|
// cc65 v2.17 assembles BRK <arg> to opcode $05
|
||||||
|
// https://github.com/cc65/cc65/issues/716
|
||||||
|
return null;
|
||||||
|
} else if (Project.CpuDef.Type != CpuDef.CpuType.Cpu65816) {
|
||||||
|
// cc65 v2.18 only supports BRK <arg> on 65816 (?!)
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
return string.Empty;
|
||||||
|
}
|
||||||
|
} else if (op == OpDef.OpWDM_WDM && mAsmVersion < V2_18) {
|
||||||
|
// cc65 v2.17 doesn't support WDM
|
||||||
// https://github.com/cc65/cc65/issues/715
|
// https://github.com/cc65/cc65/issues/715
|
||||||
// https://github.com/cc65/cc65/issues/716
|
|
||||||
return null;
|
return null;
|
||||||
} else if (op.IsUndocumented) {
|
} else if (op.IsUndocumented) {
|
||||||
if (sUndocMap.TryGetValue(op.Mnemonic, out string newValue)) {
|
if (sUndocMap.TryGetValue(op.Mnemonic, out string newValue)) {
|
||||||
|
@@ -309,7 +309,19 @@ namespace SourceGen.AsmGen {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
offset += attr.Length;
|
if (attr.IsInstructionStart) {
|
||||||
|
// look for embedded instructions, which might have formatted char data
|
||||||
|
int len;
|
||||||
|
for (len = 1; len < attr.Length; len++) {
|
||||||
|
if (Project.GetAnattrib(offset + len).IsInstructionStart) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
offset += len;
|
||||||
|
} else {
|
||||||
|
// data items
|
||||||
|
offset += attr.Length;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -534,9 +534,11 @@ namespace SourceGen {
|
|||||||
if (mAnattribs[i].IsData) {
|
if (mAnattribs[i].IsData) {
|
||||||
LogW(i, "Stripping mid-instruction data flag");
|
LogW(i, "Stripping mid-instruction data flag");
|
||||||
mAnattribs[i].IsData = false;
|
mAnattribs[i].IsData = false;
|
||||||
|
mAnattribs[i].DataDescriptor = null;
|
||||||
} else if (mAnattribs[i].IsInlineData) {
|
} else if (mAnattribs[i].IsInlineData) {
|
||||||
LogW(i, "Stripping mid-instruction inline-data flag");
|
LogW(i, "Stripping mid-instruction inline-data flag");
|
||||||
mAnattribs[i].IsInlineData = false;
|
mAnattribs[i].IsInlineData = false;
|
||||||
|
mAnattribs[i].DataDescriptor = null;
|
||||||
}
|
}
|
||||||
mAnattribs[i].IsInstruction = true;
|
mAnattribs[i].IsInstruction = true;
|
||||||
}
|
}
|
||||||
|
@@ -607,7 +607,8 @@ namespace SourceGen {
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
public void Validate() {
|
public void Validate() {
|
||||||
// Confirm that we can walk through the file, stepping directly from the start
|
// Confirm that we can walk through the file, stepping directly from the start
|
||||||
// of one thing to the start of the next.
|
// of one thing to the start of the next. We won't normally do this, because
|
||||||
|
// we need to watch for embedded instructions.
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
while (offset < mFileData.Length) {
|
while (offset < mFileData.Length) {
|
||||||
Anattrib attr = mAnattribs[offset];
|
Anattrib attr = mAnattribs[offset];
|
||||||
@@ -619,6 +620,9 @@ namespace SourceGen {
|
|||||||
// Sometimes embedded instructions continue past the "outer" instruction,
|
// Sometimes embedded instructions continue past the "outer" instruction,
|
||||||
// usually because we're misinterpreting the code. We need to deal with
|
// usually because we're misinterpreting the code. We need to deal with
|
||||||
// that here.
|
// that here.
|
||||||
|
//
|
||||||
|
// One fun way to cause this is to have inline data from a plugin that got
|
||||||
|
// overwritten by the code analyzer. See test 2022 for an example.
|
||||||
int extraInstrBytes = 0;
|
int extraInstrBytes = 0;
|
||||||
while (offset < mFileData.Length && mAnattribs[offset].IsInstruction &&
|
while (offset < mFileData.Length && mAnattribs[offset].IsInstruction &&
|
||||||
!mAnattribs[offset].IsInstructionStart) {
|
!mAnattribs[offset].IsInstructionStart) {
|
||||||
|
Binary file not shown.
@@ -34,7 +34,7 @@ namespace RuntimeData.Test2022 {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
byte func = mFileData[offset + 1];
|
byte func = mFileData[offset + 1];
|
||||||
if (func < 0x01 || func > 0x02) {
|
if (func != 0x85 && (func < 0x01 || func > 0x02)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -91,6 +91,9 @@ namespace RuntimeData.Test2022 {
|
|||||||
mAppRef.SetInlineDataFormat(nextOff, 8, DataType.StringGeneric,
|
mAppRef.SetInlineDataFormat(nextOff, 8, DataType.StringGeneric,
|
||||||
DataSubType.HighAscii, null);
|
DataSubType.HighAscii, null);
|
||||||
break;
|
break;
|
||||||
|
case 0x85:
|
||||||
|
// do nothing further
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -1,37 +1,37 @@
|
|||||||
### 6502bench SourceGen dis65 v1.0 ###
|
### 6502bench SourceGen dis65 v1.0 ###
|
||||||
{
|
{
|
||||||
"_ContentVersion":2,"FileDataLength":203,"FileDataCrc32":-1621468157,"ProjectProps":{
|
"_ContentVersion":2,"FileDataLength":213,"FileDataCrc32":-798098677,"ProjectProps":{
|
||||||
"CpuName":"65816","IncludeUndocumentedInstr":false,"TwoByteBrk":true,"EntryFlags":32702671,"AutoLabelStyle":"Simple","AnalysisParams":{
|
"CpuName":"65816","IncludeUndocumentedInstr":false,"TwoByteBrk":true,"EntryFlags":32702671,"AutoLabelStyle":"Simple","AnalysisParams":{
|
||||||
"AnalyzeUncategorizedData":true,"DefaultTextScanMode":"LowHighAscii","MinCharsForString":4,"SeekNearbyTargets":true,"SmartPlpHandling":true},
|
"AnalyzeUncategorizedData":true,"DefaultTextScanMode":"LowHighAscii","MinCharsForString":4,"SeekNearbyTargets":true,"SmartPlpHandling":true},
|
||||||
"PlatformSymbolFileIdentifiers":["PROJ:2022-extension-scripts.sym65"],"ExtensionScriptFileIdentifiers":["PROJ:2022-extension-scripts-a.cs","PROJ:2022-extension-scripts-b.cs"],"ProjectSyms":{
|
"PlatformSymbolFileIdentifiers":["PROJ:2022-extension-scripts.sym65"],"ExtensionScriptFileIdentifiers":["PROJ:2022-extension-scripts-a.cs","PROJ:2022-extension-scripts-b.cs"],"ProjectSyms":{
|
||||||
"PrintInlineDciString":{
|
"PrintInlineDciString":{
|
||||||
"DataDescriptor":{
|
"DataDescriptor":{
|
||||||
"Length":1,"Format":"NumericLE","SubFormat":"Hex","SymbolRef":null},
|
"Length":1,"Format":"NumericLE","SubFormat":"Hex","SymbolRef":null},
|
||||||
"Comment":"","HasWidth":false,"Label":"PrintInlineDciString","Value":77824,"Source":"Project","Type":"ExternalAddr"}}},
|
"Comment":"","HasWidth":false,"Direction":"ReadWrite","MultiMask":null,"Label":"PrintInlineDciString","Value":77824,"Source":"Project","Type":"ExternalAddr"}}},
|
||||||
"AddressMap":[{
|
"AddressMap":[{
|
||||||
"Offset":0,"Addr":4096},
|
"Offset":0,"Addr":4096},
|
||||||
{
|
{
|
||||||
"Offset":182,"Addr":4352}],"TypeHints":[{
|
"Offset":192,"Addr":4352}],"TypeHints":[{
|
||||||
"Low":0,"High":0,"Hint":"Code"}],"StatusFlagOverrides":{
|
"Low":0,"High":0,"Hint":"Code"}],"StatusFlagOverrides":{
|
||||||
},
|
},
|
||||||
"Comments":{
|
"Comments":{
|
||||||
},
|
"181":"split across address change"},
|
||||||
"LongComments":{
|
"LongComments":{
|
||||||
},
|
},
|
||||||
"Notes":{
|
"Notes":{
|
||||||
},
|
},
|
||||||
"UserLabels":{
|
"UserLabels":{
|
||||||
"129":{
|
"139":{
|
||||||
"Label":"PrintInline8String","Value":4225,"Source":"User","Type":"LocalOrGlobalAddr"},
|
"Label":"PrintInline8String","Value":4225,"Source":"User","Type":"LocalOrGlobalAddr"},
|
||||||
"130":{
|
"140":{
|
||||||
"Label":"PrintInlineRev8String","Value":4226,"Source":"User","Type":"LocalOrGlobalAddr"},
|
"Label":"PrintInlineRev8String","Value":4226,"Source":"User","Type":"LocalOrGlobalAddr"},
|
||||||
"131":{
|
"141":{
|
||||||
"Label":"PrintInlineNullString","Value":4227,"Source":"User","Type":"LocalOrGlobalAddr"},
|
"Label":"PrintInlineNullString","Value":4227,"Source":"User","Type":"LocalOrGlobalAddr"},
|
||||||
"160":{
|
"170":{
|
||||||
"Label":"data02","Value":4256,"Source":"User","Type":"LocalOrGlobalAddr"},
|
"Label":"data02","Value":4256,"Source":"User","Type":"LocalOrGlobalAddr"},
|
||||||
"163":{
|
"173":{
|
||||||
"Label":"data03","Value":4259,"Source":"User","Type":"LocalOrGlobalAddr"},
|
"Label":"data03","Value":4259,"Source":"User","Type":"LocalOrGlobalAddr"},
|
||||||
"132":{
|
"142":{
|
||||||
"Label":"data01","Value":4228,"Source":"User","Type":"LocalOrGlobalAddr"}},
|
"Label":"data01","Value":4228,"Source":"User","Type":"LocalOrGlobalAddr"}},
|
||||||
"OperandFormats":{
|
"OperandFormats":{
|
||||||
},
|
},
|
||||||
|
@@ -25,13 +25,20 @@ PrintInlineDciString = $013000
|
|||||||
.text $14,$00,"string with length/2"
|
.text $14,$00,"string with length/2"
|
||||||
jsl PrintInlineDciString
|
jsl PrintInlineDciString
|
||||||
.shift "DCI string"
|
.shift "DCI string"
|
||||||
jsr L10AB
|
jsr L10B5
|
||||||
jsr L110F
|
jsr L110F
|
||||||
jsr L1108
|
jsr L1108
|
||||||
brk #$01
|
brk #$01
|
||||||
.word data01
|
.word data01
|
||||||
brk #$02
|
brk #$02
|
||||||
.word data02
|
.word data02
|
||||||
|
nop
|
||||||
|
jsr L1085
|
||||||
|
.byte $24
|
||||||
|
L1085 .byte $a9
|
||||||
|
.byte $00
|
||||||
|
sta $ff
|
||||||
|
.byte $ea
|
||||||
rts
|
rts
|
||||||
|
|
||||||
PrintInline8String rts
|
PrintInline8String rts
|
||||||
@@ -62,8 +69,8 @@ data02 .word data03
|
|||||||
.enc sg_hiascii
|
.enc sg_hiascii
|
||||||
data03 .text "AllEight"
|
data03 .text "AllEight"
|
||||||
|
|
||||||
L10AB jsr PrintInlineNullString
|
L10B5 jsr PrintInlineNullString ;split across address change
|
||||||
per $8023
|
per $802d
|
||||||
rtl
|
rtl
|
||||||
|
|
||||||
.byte $65
|
.byte $65
|
||||||
|
@@ -18,13 +18,20 @@ PrintInlineDciString equ $013000
|
|||||||
strl 'string with length/2'
|
strl 'string with length/2'
|
||||||
jsl PrintInlineDciString
|
jsl PrintInlineDciString
|
||||||
dci 'DCI string'
|
dci 'DCI string'
|
||||||
jsr L10AB
|
jsr L10B5
|
||||||
jsr L110F
|
jsr L110F
|
||||||
jsr L1108
|
jsr L1108
|
||||||
brk $01
|
brk $01
|
||||||
dw data01
|
dw data01
|
||||||
brk $02
|
brk $02
|
||||||
dw data02
|
dw data02
|
||||||
|
nop
|
||||||
|
jsr L1085
|
||||||
|
dfb $24
|
||||||
|
L1085 dfb $a9
|
||||||
|
dfb $00
|
||||||
|
sta $ff
|
||||||
|
dfb $ea
|
||||||
rts
|
rts
|
||||||
|
|
||||||
PrintInline8String rts
|
PrintInline8String rts
|
||||||
@@ -54,8 +61,8 @@ data02 dw data03
|
|||||||
dfb $80
|
dfb $80
|
||||||
data03 asc "AllEight"
|
data03 asc "AllEight"
|
||||||
|
|
||||||
L10AB jsr PrintInlineNullString
|
L10B5 jsr PrintInlineNullString ;split across address change
|
||||||
per $8023
|
per $802d
|
||||||
rtl
|
rtl
|
||||||
|
|
||||||
dfb $65
|
dfb $65
|
||||||
|
@@ -21,13 +21,20 @@ PrintInlineDciString = $013000
|
|||||||
!text $14,$00,"string with length/2"
|
!text $14,$00,"string with length/2"
|
||||||
jsl PrintInlineDciString
|
jsl PrintInlineDciString
|
||||||
!text "DCI strin",$e7
|
!text "DCI strin",$e7
|
||||||
jsr L10AB
|
jsr L10B5
|
||||||
jsr L110F
|
jsr L110F
|
||||||
jsr L1108
|
jsr L1108
|
||||||
!byte $00,$01
|
!byte $00,$01
|
||||||
!word data01
|
!word data01
|
||||||
!byte $00,$02
|
!byte $00,$02
|
||||||
!word data02
|
!word data02
|
||||||
|
nop
|
||||||
|
jsr L1085
|
||||||
|
!byte $24
|
||||||
|
L1085 !byte $a9
|
||||||
|
!byte $00
|
||||||
|
sta $ff
|
||||||
|
!byte $ea
|
||||||
rts
|
rts
|
||||||
|
|
||||||
PrintInline8String rts
|
PrintInline8String rts
|
||||||
@@ -59,8 +66,8 @@ data02 !word data03
|
|||||||
data03 !text "AllEight"
|
data03 !text "AllEight"
|
||||||
}
|
}
|
||||||
|
|
||||||
L10AB jsr PrintInlineNullString
|
L10B5 jsr PrintInlineNullString ;split across address change
|
||||||
per $8023
|
per $802d
|
||||||
rtl
|
rtl
|
||||||
|
|
||||||
!byte $65
|
!byte $65
|
||||||
|
@@ -22,13 +22,20 @@ PrintInlineDciString = $013000
|
|||||||
.byte $14,$00,"string with length/2"
|
.byte $14,$00,"string with length/2"
|
||||||
jsl PrintInlineDciString
|
jsl PrintInlineDciString
|
||||||
.byte "DCI strin",$e7
|
.byte "DCI strin",$e7
|
||||||
jsr L10AB
|
jsr L10B5
|
||||||
jsr L110F
|
jsr L110F
|
||||||
jsr L1108
|
jsr L1108
|
||||||
.byte $00,$01
|
brk $01
|
||||||
.word data01
|
.word data01
|
||||||
.byte $00,$02
|
brk $02
|
||||||
.word data02
|
.word data02
|
||||||
|
nop
|
||||||
|
jsr L1085
|
||||||
|
.byte $24
|
||||||
|
L1085: .byte $a9
|
||||||
|
.byte $00
|
||||||
|
sta $ff
|
||||||
|
.byte $ea
|
||||||
rts
|
rts
|
||||||
|
|
||||||
PrintInline8String: rts
|
PrintInline8String: rts
|
||||||
@@ -63,8 +70,8 @@ data02: .word data03
|
|||||||
.endmacro
|
.endmacro
|
||||||
data03: HiAscii "AllEight"
|
data03: HiAscii "AllEight"
|
||||||
|
|
||||||
L10AB: jsr PrintInlineNullString
|
L10B5: jsr PrintInlineNullString ;split across address change
|
||||||
per $8023
|
per $802d
|
||||||
rtl
|
rtl
|
||||||
|
|
||||||
.byte $65
|
.byte $65
|
||||||
@@ -79,7 +86,7 @@ L10AB: jsr PrintInlineNullString
|
|||||||
|
|
||||||
L1108: jsl PrintInlineL2String
|
L1108: jsl PrintInlineL2String
|
||||||
asl A
|
asl A
|
||||||
.byte $00,$60
|
brk $60
|
||||||
|
|
||||||
L110F: jsr PrintInlineNullString
|
L110F: jsr PrintInlineNullString
|
||||||
adc $6e
|
adc $6e
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
# 6502bench SourceGen generated linker script for 2022-extension-scripts
|
# 6502bench SourceGen generated linker script for 2022-extension-scripts
|
||||||
MEMORY {
|
MEMORY {
|
||||||
MAIN: file=%O, start=%S, size=65536;
|
MAIN: file=%O, start=%S, size=65536;
|
||||||
# MEM000: file=%O, start=$1000, size=182;
|
# MEM000: file=%O, start=$1000, size=192;
|
||||||
# MEM001: file=%O, start=$1100, size=21;
|
# MEM001: file=%O, start=$1100, size=21;
|
||||||
}
|
}
|
||||||
SEGMENTS {
|
SEGMENTS {
|
||||||
|
@@ -43,6 +43,29 @@ PrintInlineDciString equ $013000 ;EDIT: add to project symbols
|
|||||||
brk $02
|
brk $02
|
||||||
dw data02
|
dw data02
|
||||||
|
|
||||||
|
; Handle an edge case where the inline formatting gets thrown out.
|
||||||
|
; Two paths: BIT $A9 / BRK $85 / inline $FF/EA goes first, then
|
||||||
|
; LDA $00 / STA $FF / NOP goes. When we get to the STA we notice
|
||||||
|
; that it's marked as inline data, so we remove it from $85 $ff
|
||||||
|
; but not from $ea.
|
||||||
|
;
|
||||||
|
; If we try to walk through the file, advancing offset by the anattrib
|
||||||
|
; length, we will traverse the first path, which (with 2-byte BRKs)
|
||||||
|
; runs into the $FF, which is marked as an instruction but not an
|
||||||
|
; instruction start.
|
||||||
|
;
|
||||||
|
; Switching to 1-byte BRKs makes the $85 an inline data item rather
|
||||||
|
; than an instruction. When we come back through, we LDA $00 and
|
||||||
|
; then skip over the next 3 bytes. No conflict.
|
||||||
|
nop
|
||||||
|
jsr edge1 ;alt path, evaluated later
|
||||||
|
dfb $24 ;1: BIT dp
|
||||||
|
edge1 dfb $a9 ;2: LDA imm
|
||||||
|
brk ;1: BRK <op>
|
||||||
|
dfb $85 ;2: STA imm
|
||||||
|
dfb $ff ;1: address $eaff
|
||||||
|
nop ;2:
|
||||||
|
|
||||||
rts
|
rts
|
||||||
|
|
||||||
PrintInline8String rts ;EDIT: set label
|
PrintInline8String rts ;EDIT: set label
|
||||||
|
Reference in New Issue
Block a user