mirror of
https://github.com/fadden/6502bench.git
synced 2025-01-16 19:32:31 +00:00
Correctly handle embedded instruction edge case
This began with a change to support "BRK <operand>" in cc65. The assembler only supports this for 65816 projects, so we detect that and enable it when available. While fiddling with some test code an assertion fired. This revealed a minor issue in the code analyzer: when overwriting inline data with instructions, we weren't resetting the format descriptor. The code that exercises it, which requires two-byte BRKs and an inline BRK handler in an extension script, has been added to test 2022-extension-scripts. The new regression test revealed a flaw in the 64tass code generator's character encoding scanner that caused it to hang. Fixed.
This commit is contained in:
parent
e5da5ced95
commit
b6e571afc2
@ -23,10 +23,11 @@ namespace SourceGen {
|
||||
/// <summary>
|
||||
/// Analyzer attribute holder. Contains the output of the instruction and data analyzers.
|
||||
/// Every byte in the input file has one of these associated with it.
|
||||
///
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// (Yes, it's a mutable struct. Yes, that fact has bitten me a few times. The array
|
||||
/// of these may have millions of elements, so the reduction in overhead seems worthwhile.)
|
||||
/// </summary>
|
||||
/// </remarks>
|
||||
public struct Anattrib {
|
||||
[FlagsAttribute]
|
||||
private enum AttribFlags {
|
||||
@ -351,5 +352,22 @@ namespace SourceGen {
|
||||
sb.Append(IsBranchTarget ? '>' : blank);
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
public override string ToString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
if (IsInstruction) {
|
||||
sb.Append("Inst");
|
||||
} else if (IsData) {
|
||||
sb.Append("Data");
|
||||
} else if (IsInlineData) {
|
||||
sb.Append("Inli");
|
||||
}
|
||||
if (IsStart) {
|
||||
sb.Append("Start");
|
||||
}
|
||||
sb.Append(" len=");
|
||||
sb.Append(Length);
|
||||
return sb.ToString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -326,11 +326,20 @@ namespace SourceGen.AsmGen {
|
||||
|
||||
// IGenerator
|
||||
public string ModifyOpcode(int offset, OpDef op) {
|
||||
if (op == OpDef.OpBRK_StackInt || (op == OpDef.OpWDM_WDM && mAsmVersion < V2_18)) {
|
||||
// cc65 v2.17 doesn't support WDM, and assembles BRK <arg> to opcode $05.
|
||||
// cc65 v2.18 only supports two-byte BRK on 65816 code.
|
||||
if (op == OpDef.OpBRK_StackInt) {
|
||||
if (mAsmVersion < V2_18) {
|
||||
// cc65 v2.17 assembles BRK <arg> to opcode $05
|
||||
// https://github.com/cc65/cc65/issues/716
|
||||
return null;
|
||||
} else if (Project.CpuDef.Type != CpuDef.CpuType.Cpu65816) {
|
||||
// cc65 v2.18 only supports BRK <arg> on 65816 (?!)
|
||||
return null;
|
||||
} else {
|
||||
return string.Empty;
|
||||
}
|
||||
} else if (op == OpDef.OpWDM_WDM && mAsmVersion < V2_18) {
|
||||
// cc65 v2.17 doesn't support WDM
|
||||
// https://github.com/cc65/cc65/issues/715
|
||||
// https://github.com/cc65/cc65/issues/716
|
||||
return null;
|
||||
} else if (op.IsUndocumented) {
|
||||
if (sUndocMap.TryGetValue(op.Mnemonic, out string newValue)) {
|
||||
|
@ -309,7 +309,19 @@ namespace SourceGen.AsmGen {
|
||||
return;
|
||||
}
|
||||
|
||||
offset += attr.Length;
|
||||
if (attr.IsInstructionStart) {
|
||||
// look for embedded instructions, which might have formatted char data
|
||||
int len;
|
||||
for (len = 1; len < attr.Length; len++) {
|
||||
if (Project.GetAnattrib(offset + len).IsInstructionStart) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
offset += len;
|
||||
} else {
|
||||
// data items
|
||||
offset += attr.Length;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -534,9 +534,11 @@ namespace SourceGen {
|
||||
if (mAnattribs[i].IsData) {
|
||||
LogW(i, "Stripping mid-instruction data flag");
|
||||
mAnattribs[i].IsData = false;
|
||||
mAnattribs[i].DataDescriptor = null;
|
||||
} else if (mAnattribs[i].IsInlineData) {
|
||||
LogW(i, "Stripping mid-instruction inline-data flag");
|
||||
mAnattribs[i].IsInlineData = false;
|
||||
mAnattribs[i].DataDescriptor = null;
|
||||
}
|
||||
mAnattribs[i].IsInstruction = true;
|
||||
}
|
||||
|
@ -607,7 +607,8 @@ namespace SourceGen {
|
||||
/// </summary>
|
||||
public void Validate() {
|
||||
// Confirm that we can walk through the file, stepping directly from the start
|
||||
// of one thing to the start of the next.
|
||||
// of one thing to the start of the next. We won't normally do this, because
|
||||
// we need to watch for embedded instructions.
|
||||
int offset = 0;
|
||||
while (offset < mFileData.Length) {
|
||||
Anattrib attr = mAnattribs[offset];
|
||||
@ -619,6 +620,9 @@ namespace SourceGen {
|
||||
// Sometimes embedded instructions continue past the "outer" instruction,
|
||||
// usually because we're misinterpreting the code. We need to deal with
|
||||
// that here.
|
||||
//
|
||||
// One fun way to cause this is to have inline data from a plugin that got
|
||||
// overwritten by the code analyzer. See test 2022 for an example.
|
||||
int extraInstrBytes = 0;
|
||||
while (offset < mFileData.Length && mAnattribs[offset].IsInstruction &&
|
||||
!mAnattribs[offset].IsInstructionStart) {
|
||||
|
Binary file not shown.
@ -34,7 +34,7 @@ namespace RuntimeData.Test2022 {
|
||||
return;
|
||||
}
|
||||
byte func = mFileData[offset + 1];
|
||||
if (func < 0x01 || func > 0x02) {
|
||||
if (func != 0x85 && (func < 0x01 || func > 0x02)) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -91,6 +91,9 @@ namespace RuntimeData.Test2022 {
|
||||
mAppRef.SetInlineDataFormat(nextOff, 8, DataType.StringGeneric,
|
||||
DataSubType.HighAscii, null);
|
||||
break;
|
||||
case 0x85:
|
||||
// do nothing further
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,37 +1,37 @@
|
||||
### 6502bench SourceGen dis65 v1.0 ###
|
||||
{
|
||||
"_ContentVersion":2,"FileDataLength":203,"FileDataCrc32":-1621468157,"ProjectProps":{
|
||||
"_ContentVersion":2,"FileDataLength":213,"FileDataCrc32":-798098677,"ProjectProps":{
|
||||
"CpuName":"65816","IncludeUndocumentedInstr":false,"TwoByteBrk":true,"EntryFlags":32702671,"AutoLabelStyle":"Simple","AnalysisParams":{
|
||||
"AnalyzeUncategorizedData":true,"DefaultTextScanMode":"LowHighAscii","MinCharsForString":4,"SeekNearbyTargets":true,"SmartPlpHandling":true},
|
||||
"PlatformSymbolFileIdentifiers":["PROJ:2022-extension-scripts.sym65"],"ExtensionScriptFileIdentifiers":["PROJ:2022-extension-scripts-a.cs","PROJ:2022-extension-scripts-b.cs"],"ProjectSyms":{
|
||||
"PrintInlineDciString":{
|
||||
"DataDescriptor":{
|
||||
"Length":1,"Format":"NumericLE","SubFormat":"Hex","SymbolRef":null},
|
||||
"Comment":"","HasWidth":false,"Label":"PrintInlineDciString","Value":77824,"Source":"Project","Type":"ExternalAddr"}}},
|
||||
"Comment":"","HasWidth":false,"Direction":"ReadWrite","MultiMask":null,"Label":"PrintInlineDciString","Value":77824,"Source":"Project","Type":"ExternalAddr"}}},
|
||||
"AddressMap":[{
|
||||
"Offset":0,"Addr":4096},
|
||||
{
|
||||
"Offset":182,"Addr":4352}],"TypeHints":[{
|
||||
"Offset":192,"Addr":4352}],"TypeHints":[{
|
||||
"Low":0,"High":0,"Hint":"Code"}],"StatusFlagOverrides":{
|
||||
},
|
||||
"Comments":{
|
||||
},
|
||||
"181":"split across address change"},
|
||||
"LongComments":{
|
||||
},
|
||||
"Notes":{
|
||||
},
|
||||
"UserLabels":{
|
||||
"129":{
|
||||
"139":{
|
||||
"Label":"PrintInline8String","Value":4225,"Source":"User","Type":"LocalOrGlobalAddr"},
|
||||
"130":{
|
||||
"140":{
|
||||
"Label":"PrintInlineRev8String","Value":4226,"Source":"User","Type":"LocalOrGlobalAddr"},
|
||||
"131":{
|
||||
"141":{
|
||||
"Label":"PrintInlineNullString","Value":4227,"Source":"User","Type":"LocalOrGlobalAddr"},
|
||||
"160":{
|
||||
"170":{
|
||||
"Label":"data02","Value":4256,"Source":"User","Type":"LocalOrGlobalAddr"},
|
||||
"163":{
|
||||
"173":{
|
||||
"Label":"data03","Value":4259,"Source":"User","Type":"LocalOrGlobalAddr"},
|
||||
"132":{
|
||||
"142":{
|
||||
"Label":"data01","Value":4228,"Source":"User","Type":"LocalOrGlobalAddr"}},
|
||||
"OperandFormats":{
|
||||
},
|
||||
|
@ -25,13 +25,20 @@ PrintInlineDciString = $013000
|
||||
.text $14,$00,"string with length/2"
|
||||
jsl PrintInlineDciString
|
||||
.shift "DCI string"
|
||||
jsr L10AB
|
||||
jsr L10B5
|
||||
jsr L110F
|
||||
jsr L1108
|
||||
brk #$01
|
||||
.word data01
|
||||
brk #$02
|
||||
.word data02
|
||||
nop
|
||||
jsr L1085
|
||||
.byte $24
|
||||
L1085 .byte $a9
|
||||
.byte $00
|
||||
sta $ff
|
||||
.byte $ea
|
||||
rts
|
||||
|
||||
PrintInline8String rts
|
||||
@ -62,8 +69,8 @@ data02 .word data03
|
||||
.enc sg_hiascii
|
||||
data03 .text "AllEight"
|
||||
|
||||
L10AB jsr PrintInlineNullString
|
||||
per $8023
|
||||
L10B5 jsr PrintInlineNullString ;split across address change
|
||||
per $802d
|
||||
rtl
|
||||
|
||||
.byte $65
|
||||
|
@ -18,13 +18,20 @@ PrintInlineDciString equ $013000
|
||||
strl 'string with length/2'
|
||||
jsl PrintInlineDciString
|
||||
dci 'DCI string'
|
||||
jsr L10AB
|
||||
jsr L10B5
|
||||
jsr L110F
|
||||
jsr L1108
|
||||
brk $01
|
||||
dw data01
|
||||
brk $02
|
||||
dw data02
|
||||
nop
|
||||
jsr L1085
|
||||
dfb $24
|
||||
L1085 dfb $a9
|
||||
dfb $00
|
||||
sta $ff
|
||||
dfb $ea
|
||||
rts
|
||||
|
||||
PrintInline8String rts
|
||||
@ -54,8 +61,8 @@ data02 dw data03
|
||||
dfb $80
|
||||
data03 asc "AllEight"
|
||||
|
||||
L10AB jsr PrintInlineNullString
|
||||
per $8023
|
||||
L10B5 jsr PrintInlineNullString ;split across address change
|
||||
per $802d
|
||||
rtl
|
||||
|
||||
dfb $65
|
||||
|
@ -21,13 +21,20 @@ PrintInlineDciString = $013000
|
||||
!text $14,$00,"string with length/2"
|
||||
jsl PrintInlineDciString
|
||||
!text "DCI strin",$e7
|
||||
jsr L10AB
|
||||
jsr L10B5
|
||||
jsr L110F
|
||||
jsr L1108
|
||||
!byte $00,$01
|
||||
!word data01
|
||||
!byte $00,$02
|
||||
!word data02
|
||||
nop
|
||||
jsr L1085
|
||||
!byte $24
|
||||
L1085 !byte $a9
|
||||
!byte $00
|
||||
sta $ff
|
||||
!byte $ea
|
||||
rts
|
||||
|
||||
PrintInline8String rts
|
||||
@ -59,8 +66,8 @@ data02 !word data03
|
||||
data03 !text "AllEight"
|
||||
}
|
||||
|
||||
L10AB jsr PrintInlineNullString
|
||||
per $8023
|
||||
L10B5 jsr PrintInlineNullString ;split across address change
|
||||
per $802d
|
||||
rtl
|
||||
|
||||
!byte $65
|
||||
|
@ -22,13 +22,20 @@ PrintInlineDciString = $013000
|
||||
.byte $14,$00,"string with length/2"
|
||||
jsl PrintInlineDciString
|
||||
.byte "DCI strin",$e7
|
||||
jsr L10AB
|
||||
jsr L10B5
|
||||
jsr L110F
|
||||
jsr L1108
|
||||
.byte $00,$01
|
||||
brk $01
|
||||
.word data01
|
||||
.byte $00,$02
|
||||
brk $02
|
||||
.word data02
|
||||
nop
|
||||
jsr L1085
|
||||
.byte $24
|
||||
L1085: .byte $a9
|
||||
.byte $00
|
||||
sta $ff
|
||||
.byte $ea
|
||||
rts
|
||||
|
||||
PrintInline8String: rts
|
||||
@ -63,8 +70,8 @@ data02: .word data03
|
||||
.endmacro
|
||||
data03: HiAscii "AllEight"
|
||||
|
||||
L10AB: jsr PrintInlineNullString
|
||||
per $8023
|
||||
L10B5: jsr PrintInlineNullString ;split across address change
|
||||
per $802d
|
||||
rtl
|
||||
|
||||
.byte $65
|
||||
@ -79,7 +86,7 @@ L10AB: jsr PrintInlineNullString
|
||||
|
||||
L1108: jsl PrintInlineL2String
|
||||
asl A
|
||||
.byte $00,$60
|
||||
brk $60
|
||||
|
||||
L110F: jsr PrintInlineNullString
|
||||
adc $6e
|
||||
|
@ -1,7 +1,7 @@
|
||||
# 6502bench SourceGen generated linker script for 2022-extension-scripts
|
||||
MEMORY {
|
||||
MAIN: file=%O, start=%S, size=65536;
|
||||
# MEM000: file=%O, start=$1000, size=182;
|
||||
# MEM000: file=%O, start=$1000, size=192;
|
||||
# MEM001: file=%O, start=$1100, size=21;
|
||||
}
|
||||
SEGMENTS {
|
||||
|
@ -43,6 +43,29 @@ PrintInlineDciString equ $013000 ;EDIT: add to project symbols
|
||||
brk $02
|
||||
dw data02
|
||||
|
||||
; Handle an edge case where the inline formatting gets thrown out.
|
||||
; Two paths: BIT $A9 / BRK $85 / inline $FF/EA goes first, then
|
||||
; LDA $00 / STA $FF / NOP goes. When we get to the STA we notice
|
||||
; that it's marked as inline data, so we remove it from $85 $ff
|
||||
; but not from $ea.
|
||||
;
|
||||
; If we try to walk through the file, advancing offset by the anattrib
|
||||
; length, we will traverse the first path, which (with 2-byte BRKs)
|
||||
; runs into the $FF, which is marked as an instruction but not an
|
||||
; instruction start.
|
||||
;
|
||||
; Switching to 1-byte BRKs makes the $85 an inline data item rather
|
||||
; than an instruction. When we come back through, we LDA $00 and
|
||||
; then skip over the next 3 bytes. No conflict.
|
||||
nop
|
||||
jsr edge1 ;alt path, evaluated later
|
||||
dfb $24 ;1: BIT dp
|
||||
edge1 dfb $a9 ;2: LDA imm
|
||||
brk ;1: BRK <op>
|
||||
dfb $85 ;2: STA imm
|
||||
dfb $ff ;1: address $eaff
|
||||
nop ;2:
|
||||
|
||||
rts
|
||||
|
||||
PrintInline8String rts ;EDIT: set label
|
||||
|
Loading…
x
Reference in New Issue
Block a user