From 3368182e1475510bf68ea67382bd43ffcf0ec771 Mon Sep 17 00:00:00 2001 From: Andy McFadden Date: Sun, 8 Aug 2021 15:38:39 -0700 Subject: [PATCH] Allow single-character DCI strings The DCI string format uses character values where the high bit of the last byte differs from the rest of the string. Usually all the high bits are clear except on the last byte, but SourceGen generally allows either polarity. This gets a little uncertain with single-character strings, because SourceGen can't auto-detect DCI very effectively. A series of bytes with the high bit set could be a single high-ASCII string or a series of single-byte DCI strings. The motivation for allowing them is C64 PETSCII. While ASCII allows "high ASCII" as an escape hatch, PETSCII doesn't have that option, so there's no way to mark the data as a character or a string. We still want to do a bit of screening, but if the user specifies a non-ASCII character set and the selected bytes have their high bits set, we want to just treat the whole set as 1-byte DCI. Some minor adjustments were needed for a couple of validity checks that expected longer strings. This adds some short DCI strings in different character sets to the char-encoding regression tests. (for issue #102) --- SourceGen/AsmGen/AsmTass64.cs | 2 +- SourceGen/DataAnalysis.cs | 42 ++-- SourceGen/SGTestData/20120-char-encoding-a | Bin 1244 -> 1299 bytes .../SGTestData/20120-char-encoding-a.dis65 | 124 +++++++++- SourceGen/SGTestData/20130-char-encoding-p | Bin 1244 -> 1299 bytes .../SGTestData/20130-char-encoding-p.dis65 | 232 +++++++++++++++++- SourceGen/SGTestData/20140-char-encoding-s | Bin 1244 -> 1299 bytes .../SGTestData/20140-char-encoding-s.dis65 | 124 +++++++++- .../Expected/20120-char-encoding-a_64tass.S | 35 +++ .../Expected/20120-char-encoding-a_acme.S | 30 +++ .../Expected/20120-char-encoding-a_cc65.S | 30 +++ .../Expected/20120-char-encoding-a_cc65.cfg | 2 +- .../Expected/20120-char-encoding-a_merlin32.S | 30 +++ .../Expected/20130-char-encoding-p_64tass.S | 65 ++++- .../Expected/20130-char-encoding-p_acme.S | 57 ++++- .../Expected/20130-char-encoding-p_cc65.S | 57 ++++- .../Expected/20130-char-encoding-p_cc65.cfg | 2 +- .../Expected/20130-char-encoding-p_merlin32.S | 57 ++++- .../Expected/20140-char-encoding-s_64tass.S | 35 +++ .../Expected/20140-char-encoding-s_acme.S | 30 +++ .../Expected/20140-char-encoding-s_cc65.S | 30 +++ .../Expected/20140-char-encoding-s_cc65.cfg | 2 +- .../Expected/20140-char-encoding-s_merlin32.S | 30 +++ .../SGTestData/Source/20120-char-encoding.S | 51 +++- SourceGen/WpfGui/EditDataOperand.xaml.cs | 14 +- 25 files changed, 1008 insertions(+), 73 deletions(-) diff --git a/SourceGen/AsmGen/AsmTass64.cs b/SourceGen/AsmGen/AsmTass64.cs index 1650afa..ef25f5c 100644 --- a/SourceGen/AsmGen/AsmTass64.cs +++ b/SourceGen/AsmGen/AsmTass64.cs @@ -797,7 +797,7 @@ namespace SourceGen.AsmGen { break; case FormatDescriptor.Type.StringDci: opcodeStr = sDataOpNames.StrDci; - if ((Project.FileData[offset] & 0x80) != 0) { + if ((Project.FileData[offset + dfd.Length - 1] & 0x80) == 0) { // ".shift" directive only works for strings where the low bit starts // clear and ends high. OutputNoJoy(offset, dfd.Length, labelStr, commentStr); diff --git a/SourceGen/DataAnalysis.cs b/SourceGen/DataAnalysis.cs index 6247e37..cd92601 100644 --- a/SourceGen/DataAnalysis.cs +++ b/SourceGen/DataAnalysis.cs @@ -1187,8 +1187,13 @@ namespace SourceGen { /// Counts strings in Dextral Character Inverted format, meaning the high bit on the /// last byte is the opposite of the preceding. /// - /// Each string must be at least two bytes. To reduce false-positives, we require - /// that all strings have the same hi/lo pattern. + /// To reduce false-positives, we require that all strings have the same hi/lo pattern. + /// + /// Single-character strings are allowed for C64 PETSCII, which doesn't have an + /// equivalent to "high ASCII" character formatting, so long as the terminating + /// character value has its high bit set. Without this restriction, any collection + /// of characters is just a list of DCI strings, which is a weird thing to offer up + /// in the UI. /// /// /// For C64Petscii, this will identify strings that are entirely in lower case except @@ -1201,22 +1206,21 @@ namespace SourceGen { /// Number of strings found, or -1 if bad data identified. public static int RecognizeDciStrings(byte[] fileData, int start, int end, CharEncoding.InclusionTest charTest) { - int expectedHiBit = fileData[start] & 0x80; + int endHiBit = fileData[end] & 0x80; int stringCount = 0; int stringLen = 0; - // Quick test on last byte. - if ((fileData[end] & 0x80) == expectedHiBit) { - return -1; - } - for (int i = start; i <= end; i++) { byte val = fileData[i]; - if ((val & 0x80) != expectedHiBit) { + if ((val & 0x80) == endHiBit) { // end of string if (stringLen == 0) { - // Got two consecutive bytes with end-marker polarity... fail. - return -1; + // Got two consecutive bytes with end-marker polarity. Allow if the + // end char high bit is set. Otherwise it's just a sequence of + // regular characters. + if (endHiBit == 0) { + return -1; + } } stringCount++; stringLen = 0; @@ -1230,6 +1234,12 @@ namespace SourceGen { //} } + bool isAscii = charTest(0x5c); // temporary hack + if (isAscii && stringCount == end - start + 1) { + // Entire region is single-character strings. Don't allow for ASCII or + // high ASCII. + return -1; + } return stringCount; } @@ -1330,10 +1340,10 @@ namespace SourceGen { } return true; case FormatDescriptor.Type.StringDci: - if (length < 2) { - failMsg = Res.Strings.STR_VFY_DCI_SHORT; - return false; - } + //if (length < 2) { + // failMsg = Res.Strings.STR_VFY_DCI_SHORT; + // return false; + //} byte first = (byte)(fileData[offset] & 0x80); for (int i = offset + 1; i < offset + length - 1; i++) { if ((fileData[i] & 0x80) != first) { @@ -1341,7 +1351,7 @@ namespace SourceGen { return false; } } - if ((fileData[offset + length - 1] & 0x80) == first) { + if (length > 1 && (fileData[offset + length - 1] & 0x80) == first) { failMsg = Res.Strings.STR_VFY_DCI_NOT_TERMINATED; return false; } diff --git a/SourceGen/SGTestData/20120-char-encoding-a b/SourceGen/SGTestData/20120-char-encoding-a index f2e00aa884c241d0ff1381c6dc46ed9e5c025e77..eaac713643fa32011a3a6461605e81fabc1c0153 100644 GIT binary patch delta 64 zcmcb^Ihkw2Z59h3LD4pk)SR669w%=sY&-Gv&71rao)L;|9({J8y}zm delta 9 QcmbQtb%%4qZ5GA^022rUa{vGU diff --git a/SourceGen/SGTestData/20120-char-encoding-a.dis65 b/SourceGen/SGTestData/20120-char-encoding-a.dis65 index 6bbfae8..662918d 100644 --- a/SourceGen/SGTestData/20120-char-encoding-a.dis65 +++ b/SourceGen/SGTestData/20120-char-encoding-a.dis65 @@ -1,8 +1,8 @@ ### 6502bench SourceGen dis65 v1.0 ### { "_ContentVersion":4, -"FileDataLength":1244, -"FileDataCrc32":220973587, +"FileDataLength":1299, +"FileDataCrc32":-371479809, "ProjectProps":{ "CpuName":"6502", "IncludeUndocumentedInstr":false, @@ -666,6 +666,126 @@ "Length":33, "Format":"StringGeneric", "SubFormat":"HighAscii", +"SymbolRef":null}, + +"1247":{ +"Length":5, +"Format":"StringDci", +"SubFormat":"Ascii", +"SymbolRef":null}, + +"1252":{ +"Length":2, +"Format":"StringDci", +"SubFormat":"Ascii", +"SymbolRef":null}, + +"1254":{ +"Length":1, +"Format":"StringDci", +"SubFormat":"Ascii", +"SymbolRef":null}, + +"1255":{ +"Length":1, +"Format":"StringDci", +"SubFormat":"Ascii", +"SymbolRef":null}, + +"1257":{ +"Length":5, +"Format":"StringDci", +"SubFormat":"HighAscii", +"SymbolRef":null}, + +"1262":{ +"Length":2, +"Format":"StringDci", +"SubFormat":"HighAscii", +"SymbolRef":null}, + +"1264":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"Ascii", +"SymbolRef":null}, + +"1265":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"Ascii", +"SymbolRef":null}, + +"1267":{ +"Length":5, +"Format":"StringDci", +"SubFormat":"C64Petscii", +"SymbolRef":null}, + +"1272":{ +"Length":2, +"Format":"StringDci", +"SubFormat":"C64Petscii", +"SymbolRef":null}, + +"1274":{ +"Length":1, +"Format":"StringDci", +"SubFormat":"C64Petscii", +"SymbolRef":null}, + +"1275":{ +"Length":1, +"Format":"StringDci", +"SubFormat":"C64Petscii", +"SymbolRef":null}, + +"1277":{ +"Length":5, +"Format":"StringDci", +"SubFormat":"C64Petscii", +"SymbolRef":null}, + +"1282":{ +"Length":2, +"Format":"StringDci", +"SubFormat":"C64Petscii", +"SymbolRef":null}, + +"1284":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"C64Petscii", +"SymbolRef":null}, + +"1285":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"C64Petscii", +"SymbolRef":null}, + +"1287":{ +"Length":5, +"Format":"StringDci", +"SubFormat":"C64Screen", +"SymbolRef":null}, + +"1292":{ +"Length":2, +"Format":"StringDci", +"SubFormat":"C64Screen", +"SymbolRef":null}, + +"1294":{ +"Length":1, +"Format":"StringDci", +"SubFormat":"C64Screen", +"SymbolRef":null}, + +"1295":{ +"Length":1, +"Format":"StringDci", +"SubFormat":"C64Screen", "SymbolRef":null}}, "LvTables":{ diff --git a/SourceGen/SGTestData/20130-char-encoding-p b/SourceGen/SGTestData/20130-char-encoding-p index f2e00aa884c241d0ff1381c6dc46ed9e5c025e77..eaac713643fa32011a3a6461605e81fabc1c0153 100644 GIT binary patch delta 64 zcmcb^Ihkw2Z59h3LD4pk)SR669w%=sY&-Gv&71rao)L;|9({J8y}zm delta 9 QcmbQtb%%4qZ5GA^022rUa{vGU diff --git a/SourceGen/SGTestData/20130-char-encoding-p.dis65 b/SourceGen/SGTestData/20130-char-encoding-p.dis65 index da9e83a..7c95071 100644 --- a/SourceGen/SGTestData/20130-char-encoding-p.dis65 +++ b/SourceGen/SGTestData/20130-char-encoding-p.dis65 @@ -1,8 +1,8 @@ ### 6502bench SourceGen dis65 v1.0 ### { "_ContentVersion":4, -"FileDataLength":1244, -"FileDataCrc32":220973587, +"FileDataLength":1299, +"FileDataCrc32":-371479809, "ProjectProps":{ "CpuName":"6502", "IncludeUndocumentedInstr":false, @@ -554,10 +554,238 @@ "SubFormat":"C64Petscii", "SymbolRef":null}, +"1175":{ +"Length":12, +"Format":"StringGeneric", +"SubFormat":"C64Screen", +"SymbolRef":null}, + +"1187":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"Hex", +"SymbolRef":null}, + +"1188":{ +"Length":2, +"Format":"StringGeneric", +"SubFormat":"C64Screen", +"SymbolRef":null}, + +"1190":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"Hex", +"SymbolRef":null}, + +"1191":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"Hex", +"SymbolRef":null}, + +"1192":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"C64Screen", +"SymbolRef":null}, + +"1193":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"Hex", +"SymbolRef":null}, + +"1194":{ +"Length":2, +"Format":"StringGeneric", +"SubFormat":"C64Screen", +"SymbolRef":null}, + +"1196":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"Hex", +"SymbolRef":null}, + +"1197":{ +"Length":2, +"Format":"StringGeneric", +"SubFormat":"C64Screen", +"SymbolRef":null}, + +"1199":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"Hex", +"SymbolRef":null}, + +"1200":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"Hex", +"SymbolRef":null}, + +"1201":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"C64Screen", +"SymbolRef":null}, + +"1202":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"Hex", +"SymbolRef":null}, + +"1203":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"Hex", +"SymbolRef":null}, + +"1204":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"C64Screen", +"SymbolRef":null}, + +"1205":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"Hex", +"SymbolRef":null}, + +"1206":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"C64Screen", +"SymbolRef":null}, + "1208":{ "Length":33, "Format":"StringGeneric", "SubFormat":"HighAscii", +"SymbolRef":null}, + +"1247":{ +"Length":5, +"Format":"StringDci", +"SubFormat":"Ascii", +"SymbolRef":null}, + +"1252":{ +"Length":2, +"Format":"StringDci", +"SubFormat":"Ascii", +"SymbolRef":null}, + +"1254":{ +"Length":1, +"Format":"StringDci", +"SubFormat":"Ascii", +"SymbolRef":null}, + +"1255":{ +"Length":1, +"Format":"StringDci", +"SubFormat":"Ascii", +"SymbolRef":null}, + +"1257":{ +"Length":5, +"Format":"StringDci", +"SubFormat":"HighAscii", +"SymbolRef":null}, + +"1262":{ +"Length":2, +"Format":"StringDci", +"SubFormat":"HighAscii", +"SymbolRef":null}, + +"1264":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"Ascii", +"SymbolRef":null}, + +"1265":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"Ascii", +"SymbolRef":null}, + +"1267":{ +"Length":5, +"Format":"StringDci", +"SubFormat":"C64Petscii", +"SymbolRef":null}, + +"1272":{ +"Length":2, +"Format":"StringDci", +"SubFormat":"C64Petscii", +"SymbolRef":null}, + +"1274":{ +"Length":1, +"Format":"StringDci", +"SubFormat":"C64Petscii", +"SymbolRef":null}, + +"1275":{ +"Length":1, +"Format":"StringDci", +"SubFormat":"C64Petscii", +"SymbolRef":null}, + +"1277":{ +"Length":5, +"Format":"StringDci", +"SubFormat":"C64Petscii", +"SymbolRef":null}, + +"1282":{ +"Length":2, +"Format":"StringDci", +"SubFormat":"C64Petscii", +"SymbolRef":null}, + +"1284":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"C64Petscii", +"SymbolRef":null}, + +"1285":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"C64Petscii", +"SymbolRef":null}, + +"1287":{ +"Length":5, +"Format":"StringDci", +"SubFormat":"C64Screen", +"SymbolRef":null}, + +"1292":{ +"Length":2, +"Format":"StringDci", +"SubFormat":"C64Screen", +"SymbolRef":null}, + +"1294":{ +"Length":1, +"Format":"StringDci", +"SubFormat":"C64Screen", +"SymbolRef":null}, + +"1295":{ +"Length":1, +"Format":"StringDci", +"SubFormat":"C64Screen", "SymbolRef":null}}, "LvTables":{ diff --git a/SourceGen/SGTestData/20140-char-encoding-s b/SourceGen/SGTestData/20140-char-encoding-s index f2e00aa884c241d0ff1381c6dc46ed9e5c025e77..eaac713643fa32011a3a6461605e81fabc1c0153 100644 GIT binary patch delta 64 zcmcb^Ihkw2Z59h3LD4pk)SR669w%=sY&-Gv&71rao)L;|9({J8y}zm delta 9 QcmbQtb%%4qZ5GA^022rUa{vGU diff --git a/SourceGen/SGTestData/20140-char-encoding-s.dis65 b/SourceGen/SGTestData/20140-char-encoding-s.dis65 index 9b9afc6..7b9099b 100644 --- a/SourceGen/SGTestData/20140-char-encoding-s.dis65 +++ b/SourceGen/SGTestData/20140-char-encoding-s.dis65 @@ -1,8 +1,8 @@ ### 6502bench SourceGen dis65 v1.0 ### { "_ContentVersion":4, -"FileDataLength":1244, -"FileDataCrc32":220973587, +"FileDataLength":1299, +"FileDataCrc32":-371479809, "ProjectProps":{ "CpuName":"6502", "IncludeUndocumentedInstr":false, @@ -666,6 +666,126 @@ "Length":33, "Format":"StringGeneric", "SubFormat":"HighAscii", +"SymbolRef":null}, + +"1247":{ +"Length":5, +"Format":"StringDci", +"SubFormat":"Ascii", +"SymbolRef":null}, + +"1252":{ +"Length":2, +"Format":"StringDci", +"SubFormat":"Ascii", +"SymbolRef":null}, + +"1254":{ +"Length":1, +"Format":"StringDci", +"SubFormat":"Ascii", +"SymbolRef":null}, + +"1255":{ +"Length":1, +"Format":"StringDci", +"SubFormat":"Ascii", +"SymbolRef":null}, + +"1257":{ +"Length":5, +"Format":"StringDci", +"SubFormat":"HighAscii", +"SymbolRef":null}, + +"1262":{ +"Length":2, +"Format":"StringDci", +"SubFormat":"HighAscii", +"SymbolRef":null}, + +"1264":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"Ascii", +"SymbolRef":null}, + +"1265":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"Ascii", +"SymbolRef":null}, + +"1267":{ +"Length":5, +"Format":"StringDci", +"SubFormat":"C64Petscii", +"SymbolRef":null}, + +"1272":{ +"Length":2, +"Format":"StringDci", +"SubFormat":"C64Petscii", +"SymbolRef":null}, + +"1274":{ +"Length":1, +"Format":"StringDci", +"SubFormat":"C64Petscii", +"SymbolRef":null}, + +"1275":{ +"Length":1, +"Format":"StringDci", +"SubFormat":"C64Petscii", +"SymbolRef":null}, + +"1277":{ +"Length":5, +"Format":"StringDci", +"SubFormat":"C64Petscii", +"SymbolRef":null}, + +"1282":{ +"Length":2, +"Format":"StringDci", +"SubFormat":"C64Petscii", +"SymbolRef":null}, + +"1284":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"C64Petscii", +"SymbolRef":null}, + +"1285":{ +"Length":1, +"Format":"NumericLE", +"SubFormat":"C64Petscii", +"SymbolRef":null}, + +"1287":{ +"Length":5, +"Format":"StringDci", +"SubFormat":"C64Screen", +"SymbolRef":null}, + +"1292":{ +"Length":2, +"Format":"StringDci", +"SubFormat":"C64Screen", +"SymbolRef":null}, + +"1294":{ +"Length":1, +"Format":"StringDci", +"SubFormat":"C64Screen", +"SymbolRef":null}, + +"1295":{ +"Length":1, +"Format":"StringDci", +"SubFormat":"C64Screen", "SymbolRef":null}}, "LvTables":{ diff --git a/SourceGen/SGTestData/Expected/20120-char-encoding-a_64tass.S b/SourceGen/SGTestData/Expected/20120-char-encoding-a_64tass.S index 6356296..470fda0 100644 --- a/SourceGen/SGTestData/Expected/20120-char-encoding-a_64tass.S +++ b/SourceGen/SGTestData/Expected/20120-char-encoding-a_64tass.S @@ -251,5 +251,40 @@ L144B nop .byte $80 _L14DA nop + jmp _L1511 + + .byte $86 + .enc "sg_ascii" + .shift "Hello" + .shift "HI" + .shift "X" + .shift "!" + .byte $86 + .enc "sg_hiascii" + .byte $c8,$e5,$ec,$ec,$6f + .byte $c8,$49 + .enc "sg_ascii" + .byte 'X' + .byte '!' + .byte $86 + .enc "none" + .shift "hello" + .shift "hi" + .shift "x" + .shift "!" + .byte $86 + .byte $c8,$c5,$cc,$cc,$4f + .byte $c8,$49 + .byte 'x' + .byte '!' + .byte $86 + .enc "screen" + .shift "Hello" + .shift "HI" + .shift "X" + .shift "!" + .byte $86 + +_L1511 nop rts diff --git a/SourceGen/SGTestData/Expected/20120-char-encoding-a_acme.S b/SourceGen/SGTestData/Expected/20120-char-encoding-a_acme.S index 42e3f11..41d21a7 100644 --- a/SourceGen/SGTestData/Expected/20120-char-encoding-a_acme.S +++ b/SourceGen/SGTestData/Expected/20120-char-encoding-a_acme.S @@ -220,5 +220,35 @@ L144B nop !byte $80 @L14DA nop + jmp @L1511 + + !byte $86 + !text "Hell",$ef + !text "H",$c9 + !text $d8 + !text $a1 + !byte $86 + !hex c8e5ecec6f + !hex c849 + !byte 'X' + !byte '!' + !byte $86 + !pet "hellO" + !pet "hI" + !pet "X" + !pet $a1 + !byte $86 + !pet "HELLo" + !pet "Hi" + !byte $58 + !byte $21 + !byte $86 + !scr "Hell",$8f + !scr "H",$c9 + !scr $d8 + !scr $a1 + !byte $86 + +@L1511 nop rts diff --git a/SourceGen/SGTestData/Expected/20120-char-encoding-a_cc65.S b/SourceGen/SGTestData/Expected/20120-char-encoding-a_cc65.S index 9c87ac6..7503a0e 100644 --- a/SourceGen/SGTestData/Expected/20120-char-encoding-a_cc65.S +++ b/SourceGen/SGTestData/Expected/20120-char-encoding-a_cc65.S @@ -247,5 +247,35 @@ L144B: nop .byte $80 @L14DA: nop + jmp @L1511 + + .byte $86 + .byte "Hell",$ef + .byte "H",$c9 + .byte $d8 + .byte $a1 + .byte $86 + .byte $c8,$e5,$ec,$ec,$6f + .byte $c8,$49 + .byte 'X' + .byte '!' + .byte $86 + .byte $48,$45,$4c,$4c,$cf + .byte $48,$c9 + .byte $d8 + .byte $a1 + .byte $86 + .byte $c8,$c5,$cc,$cc,$4f + .byte $c8,$49 + .byte $58 + .byte $21 + .byte $86 + .byte $48,$05,$0c,$0c,$8f + .byte $48,$c9 + .byte $d8 + .byte $a1 + .byte $86 + +@L1511: nop rts diff --git a/SourceGen/SGTestData/Expected/20120-char-encoding-a_cc65.cfg b/SourceGen/SGTestData/Expected/20120-char-encoding-a_cc65.cfg index dc2f1ea..bbb91e4 100644 --- a/SourceGen/SGTestData/Expected/20120-char-encoding-a_cc65.cfg +++ b/SourceGen/SGTestData/Expected/20120-char-encoding-a_cc65.cfg @@ -1,7 +1,7 @@ # 6502bench SourceGen generated linker script for 20120-char-encoding-a MEMORY { MAIN: file=%O, start=%S, size=65536; -# MEM000: file=%O, start=$1000, size=1244; +# MEM000: file=%O, start=$1000, size=1299; } SEGMENTS { CODE: load=MAIN, type=rw; diff --git a/SourceGen/SGTestData/Expected/20120-char-encoding-a_merlin32.S b/SourceGen/SGTestData/Expected/20120-char-encoding-a_merlin32.S index bc3729a..50b0d1f 100644 --- a/SourceGen/SGTestData/Expected/20120-char-encoding-a_merlin32.S +++ b/SourceGen/SGTestData/Expected/20120-char-encoding-a_merlin32.S @@ -214,5 +214,35 @@ L144B nop dfb $80 :L14DA nop + jmp :L1511 + + dfb $86 + dci 'Hello' + dci 'HI' + dci 'X' + dci '!' + dfb $86 + dci "Hello" + dci "HI" + dfb 'X' + dfb '!' + dfb $86 + hex 48454c4ccf + hex 48c9 + hex d8 + hex a1 + dfb $86 + hex c8c5cccc4f + hex c849 + dfb $58 + dfb $21 + dfb $86 + hex 48050c0c8f + hex 48c9 + hex d8 + hex a1 + dfb $86 + +:L1511 nop rts diff --git a/SourceGen/SGTestData/Expected/20130-char-encoding-p_64tass.S b/SourceGen/SGTestData/Expected/20130-char-encoding-p_64tass.S index 38bc5b1..a143597 100644 --- a/SourceGen/SGTestData/Expected/20130-char-encoding-p_64tass.S +++ b/SourceGen/SGTestData/Expected/20130-char-encoding-p_64tass.S @@ -319,22 +319,65 @@ L144B nop .byte $5c .byte 't' .byte $80 - .text "scr ",$05 - .byte $13 - .byte $03 - .byte $01 - .byte $10 - .byte $05 - .byte $13 - .text " ",$1c,": ",$1c,$1c," ",$1c,$12," ",$1c - .byte $0e - .text " ",$1c,$22," ",$1c,"' ",$1c - .byte $14 + .enc "screen" + .text "SCR escapes " + .byte $1c + .text ": " + .byte $1c + .byte $1c + .byte ' ' + .byte $1c + .text "r " + .byte $1c + .text "n " + .byte $1c + .byte $22 + .byte ' ' + .byte $1c + .byte $27 + .byte ' ' + .byte $1c + .byte 't' .byte $80 .enc "sg_hiascii" .text "HIGH escapes \: \\ \r \n \0 \' \t" .byte $80 _L14DA nop + jmp _L1511 + + .byte $86 + .enc "sg_ascii" + .shift "Hello" + .shift "HI" + .shift "X" + .shift "!" + .byte $86 + .enc "sg_hiascii" + .byte $c8,$e5,$ec,$ec,$6f + .byte $c8,$49 + .enc "sg_ascii" + .byte 'X' + .byte '!' + .byte $86 + .enc "none" + .shift "hello" + .shift "hi" + .shift "x" + .shift "!" + .byte $86 + .byte $c8,$c5,$cc,$cc,$4f + .byte $c8,$49 + .byte 'x' + .byte '!' + .byte $86 + .enc "screen" + .shift "Hello" + .shift "HI" + .shift "X" + .shift "!" + .byte $86 + +_L1511 nop rts diff --git a/SourceGen/SGTestData/Expected/20130-char-encoding-p_acme.S b/SourceGen/SGTestData/Expected/20130-char-encoding-p_acme.S index dc859b1..ceecb1b 100644 --- a/SourceGen/SGTestData/Expected/20130-char-encoding-p_acme.S +++ b/SourceGen/SGTestData/Expected/20130-char-encoding-p_acme.S @@ -288,16 +288,23 @@ L144B nop !byte $5c !byte $54 !byte $80 - !pet "scr ",$05 - !byte $13 - !byte $03 - !byte $01 - !byte $10 - !byte $05 - !byte $13 - !pet " ",$1c,": ",$1c,$1c," ",$1c,$12," ",$1c - !byte $0e - !pet " ",$1c,$22," ",$1c,"' ",$1c + !scr "SCR escapes " + !byte $1c + !scr ": " + !byte $1c + !byte $1c + !byte $20 + !byte $1c + !scr "r " + !byte $1c + !scr "n " + !byte $1c + !byte $22 + !byte $20 + !byte $1c + !byte $27 + !byte $20 + !byte $1c !byte $14 !byte $80 !xor $80 { @@ -306,5 +313,35 @@ L144B nop !byte $80 @L14DA nop + jmp @L1511 + + !byte $86 + !text "Hell",$ef + !text "H",$c9 + !text $d8 + !text $a1 + !byte $86 + !hex c8e5ecec6f + !hex c849 + !byte 'X' + !byte '!' + !byte $86 + !pet "hellO" + !pet "hI" + !pet "X" + !pet $a1 + !byte $86 + !pet "HELLo" + !pet "Hi" + !byte $58 + !byte $21 + !byte $86 + !scr "Hell",$8f + !scr "H",$c9 + !scr $d8 + !scr $a1 + !byte $86 + +@L1511 nop rts diff --git a/SourceGen/SGTestData/Expected/20130-char-encoding-p_cc65.S b/SourceGen/SGTestData/Expected/20130-char-encoding-p_cc65.S index dcc44a5..29d3bc5 100644 --- a/SourceGen/SGTestData/Expected/20130-char-encoding-p_cc65.S +++ b/SourceGen/SGTestData/Expected/20130-char-encoding-p_cc65.S @@ -317,21 +317,58 @@ L144B: nop .byte $5c .byte $54 .byte $80 - .byte $53,$43,$52,$20,$05 - .byte $13 - .byte $03 - .byte $01 - .byte $10 - .byte $05 - .byte $13 - .byte $20,$1c,$3a,$20,$1c,$1c,$20,$1c,$12,$20,$1c - .byte $0e - .byte $20,$1c,$22,$20,$1c,$27,$20,$1c + .byte $53,$43,$52,$20,$05,$13,$03,$01,$10,$05,$13,$20 + .byte $1c + .byte $3a,$20 + .byte $1c + .byte $1c + .byte $20 + .byte $1c + .byte $12,$20 + .byte $1c + .byte $0e,$20 + .byte $1c + .byte $22 + .byte $20 + .byte $1c + .byte $27 + .byte $20 + .byte $1c .byte $14 .byte $80 HiAscii "HIGH escapes \: \\ \r \n \0 \' \t" .byte $80 @L14DA: nop + jmp @L1511 + + .byte $86 + .byte "Hell",$ef + .byte "H",$c9 + .byte $d8 + .byte $a1 + .byte $86 + .byte $c8,$e5,$ec,$ec,$6f + .byte $c8,$49 + .byte 'X' + .byte '!' + .byte $86 + .byte $48,$45,$4c,$4c,$cf + .byte $48,$c9 + .byte $d8 + .byte $a1 + .byte $86 + .byte $c8,$c5,$cc,$cc,$4f + .byte $c8,$49 + .byte $58 + .byte $21 + .byte $86 + .byte $48,$05,$0c,$0c,$8f + .byte $48,$c9 + .byte $d8 + .byte $a1 + .byte $86 + +@L1511: nop rts diff --git a/SourceGen/SGTestData/Expected/20130-char-encoding-p_cc65.cfg b/SourceGen/SGTestData/Expected/20130-char-encoding-p_cc65.cfg index cae7f95..e8dbd15 100644 --- a/SourceGen/SGTestData/Expected/20130-char-encoding-p_cc65.cfg +++ b/SourceGen/SGTestData/Expected/20130-char-encoding-p_cc65.cfg @@ -1,7 +1,7 @@ # 6502bench SourceGen generated linker script for 20130-char-encoding-p MEMORY { MAIN: file=%O, start=%S, size=65536; -# MEM000: file=%O, start=$1000, size=1244; +# MEM000: file=%O, start=$1000, size=1299; } SEGMENTS { CODE: load=MAIN, type=rw; diff --git a/SourceGen/SGTestData/Expected/20130-char-encoding-p_merlin32.S b/SourceGen/SGTestData/Expected/20130-char-encoding-p_merlin32.S index 0298e4f..3fdcc90 100644 --- a/SourceGen/SGTestData/Expected/20130-char-encoding-p_merlin32.S +++ b/SourceGen/SGTestData/Expected/20130-char-encoding-p_merlin32.S @@ -285,21 +285,58 @@ L144B nop dfb $5c dfb $54 dfb $80 - hex 5343522005 - dfb $13 - dfb $03 - dfb $01 - dfb $10 - dfb $05 - dfb $13 - hex 201c3a201c1c201c12201c - dfb $0e - hex 201c22201c27201c + hex 534352200513030110051320 + dfb $1c + hex 3a20 + dfb $1c + dfb $1c + dfb $20 + dfb $1c + hex 1220 + dfb $1c + hex 0e20 + dfb $1c + dfb $22 + dfb $20 + dfb $1c + dfb $27 + dfb $20 + dfb $1c dfb $14 dfb $80 asc "HIGH escapes \: \\ \r \n \0 \' \t" dfb $80 :L14DA nop + jmp :L1511 + + dfb $86 + dci 'Hello' + dci 'HI' + dci 'X' + dci '!' + dfb $86 + dci "Hello" + dci "HI" + dfb 'X' + dfb '!' + dfb $86 + hex 48454c4ccf + hex 48c9 + hex d8 + hex a1 + dfb $86 + hex c8c5cccc4f + hex c849 + dfb $58 + dfb $21 + dfb $86 + hex 48050c0c8f + hex 48c9 + hex d8 + hex a1 + dfb $86 + +:L1511 nop rts diff --git a/SourceGen/SGTestData/Expected/20140-char-encoding-s_64tass.S b/SourceGen/SGTestData/Expected/20140-char-encoding-s_64tass.S index 8953808..2bede8c 100644 --- a/SourceGen/SGTestData/Expected/20140-char-encoding-s_64tass.S +++ b/SourceGen/SGTestData/Expected/20140-char-encoding-s_64tass.S @@ -352,5 +352,40 @@ L144B nop .byte $80 _L14DA nop + jmp _L1511 + + .byte $86 + .enc "sg_ascii" + .shift "Hello" + .shift "HI" + .shift "X" + .shift "!" + .byte $86 + .enc "sg_hiascii" + .byte $c8,$e5,$ec,$ec,$6f + .byte $c8,$49 + .enc "sg_ascii" + .byte 'X' + .byte '!' + .byte $86 + .enc "none" + .shift "hello" + .shift "hi" + .shift "x" + .shift "!" + .byte $86 + .byte $c8,$c5,$cc,$cc,$4f + .byte $c8,$49 + .byte 'x' + .byte '!' + .byte $86 + .enc "screen" + .shift "Hello" + .shift "HI" + .shift "X" + .shift "!" + .byte $86 + +_L1511 nop rts diff --git a/SourceGen/SGTestData/Expected/20140-char-encoding-s_acme.S b/SourceGen/SGTestData/Expected/20140-char-encoding-s_acme.S index 722aea7..d2774ac 100644 --- a/SourceGen/SGTestData/Expected/20140-char-encoding-s_acme.S +++ b/SourceGen/SGTestData/Expected/20140-char-encoding-s_acme.S @@ -322,5 +322,35 @@ L144B nop !byte $80 @L14DA nop + jmp @L1511 + + !byte $86 + !text "Hell",$ef + !text "H",$c9 + !text $d8 + !text $a1 + !byte $86 + !hex c8e5ecec6f + !hex c849 + !byte 'X' + !byte '!' + !byte $86 + !pet "hellO" + !pet "hI" + !pet "X" + !pet $a1 + !byte $86 + !pet "HELLo" + !pet "Hi" + !byte $58 + !byte $21 + !byte $86 + !scr "Hell",$8f + !scr "H",$c9 + !scr $d8 + !scr $a1 + !byte $86 + +@L1511 nop rts diff --git a/SourceGen/SGTestData/Expected/20140-char-encoding-s_cc65.S b/SourceGen/SGTestData/Expected/20140-char-encoding-s_cc65.S index d0259a0..8ea10bd 100644 --- a/SourceGen/SGTestData/Expected/20140-char-encoding-s_cc65.S +++ b/SourceGen/SGTestData/Expected/20140-char-encoding-s_cc65.S @@ -349,5 +349,35 @@ L144B: nop .byte $80 @L14DA: nop + jmp @L1511 + + .byte $86 + .byte "Hell",$ef + .byte "H",$c9 + .byte $d8 + .byte $a1 + .byte $86 + .byte $c8,$e5,$ec,$ec,$6f + .byte $c8,$49 + .byte 'X' + .byte '!' + .byte $86 + .byte $48,$45,$4c,$4c,$cf + .byte $48,$c9 + .byte $d8 + .byte $a1 + .byte $86 + .byte $c8,$c5,$cc,$cc,$4f + .byte $c8,$49 + .byte $58 + .byte $21 + .byte $86 + .byte $48,$05,$0c,$0c,$8f + .byte $48,$c9 + .byte $d8 + .byte $a1 + .byte $86 + +@L1511: nop rts diff --git a/SourceGen/SGTestData/Expected/20140-char-encoding-s_cc65.cfg b/SourceGen/SGTestData/Expected/20140-char-encoding-s_cc65.cfg index 97bdeda..7e3b9d0 100644 --- a/SourceGen/SGTestData/Expected/20140-char-encoding-s_cc65.cfg +++ b/SourceGen/SGTestData/Expected/20140-char-encoding-s_cc65.cfg @@ -1,7 +1,7 @@ # 6502bench SourceGen generated linker script for 20140-char-encoding-s MEMORY { MAIN: file=%O, start=%S, size=65536; -# MEM000: file=%O, start=$1000, size=1244; +# MEM000: file=%O, start=$1000, size=1299; } SEGMENTS { CODE: load=MAIN, type=rw; diff --git a/SourceGen/SGTestData/Expected/20140-char-encoding-s_merlin32.S b/SourceGen/SGTestData/Expected/20140-char-encoding-s_merlin32.S index b446638..3b93e5e 100644 --- a/SourceGen/SGTestData/Expected/20140-char-encoding-s_merlin32.S +++ b/SourceGen/SGTestData/Expected/20140-char-encoding-s_merlin32.S @@ -317,5 +317,35 @@ L144B nop dfb $80 :L14DA nop + jmp :L1511 + + dfb $86 + dci 'Hello' + dci 'HI' + dci 'X' + dci '!' + dfb $86 + dci "Hello" + dci "HI" + dfb 'X' + dfb '!' + dfb $86 + hex 48454c4ccf + hex 48c9 + hex d8 + hex a1 + dfb $86 + hex c8c5cccc4f + hex c849 + dfb $58 + dfb $21 + dfb $86 + hex 48050c0c8f + hex 48c9 + hex d8 + hex a1 + dfb $86 + +:L1511 nop rts diff --git a/SourceGen/SGTestData/Source/20120-char-encoding.S b/SourceGen/SGTestData/Source/20120-char-encoding.S index 3f7d047..c5dbe7e 100644 --- a/SourceGen/SGTestData/Source/20120-char-encoding.S +++ b/SourceGen/SGTestData/Source/20120-char-encoding.S @@ -1,7 +1,9 @@ ; Copyright 2019 faddenSoft. All Rights Reserved. ; See the LICENSE.txt file for distribution terms (Apache 2.0). ; -; Assembler: ACME (has good PETSCII/screen code support) +; Assembler: ACME v0.96.4 (has good PETSCII/screen code support) +; (acme -o 20120-char-encoding 20120-char-encoding.S) +; ; 6502 version ; ; This becomes multiple tests that differ only in how the default @@ -145,8 +147,10 @@ allbytes !hex f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff end nop - jmp skip_bs + jmp skip_esc +; String literals with backslash escapes. Necessary because ACME v0.97 +; introduced the feature and broke compatibility with previous versions. !text "ASCII escapes \: \\ \r \n \",$22," \",$27," \t" !byte $80 !pet "PETSCII escapes \: \\ \r \n \",$22," \",$27," \t" @@ -159,5 +163,46 @@ end nop } !byte $80 -skip_bs nop +skip_esc nop + jmp skip_dci + +; Small batches of DCI strings, for testing single-character strings. +; EDIT: format all of these as DCI strings + !byte $86 + + !text "Hell",$ef ;Hello + !text "H",$c9 ;Hi + !byte $d8 ;X + !byte $a1 ;! + !byte $86 + + !xor $80 { + !text "Hell",$ef ;Hello + !text "H",$c9 ;Hi + !byte $d8 ;X + !byte $a1 ;! + } + !byte $86 + + !pet "hellO" + !pet "hI" + !byte $d8 + !byte $a1 + !byte $86 + + !xor $80 { ;hi then lo; probably not necessary + !pet "hellO" ;(all C64 DCI seems to be lo-then-hi) + !pet "hI" + !byte $d8 + !byte $a1 + } + !byte $86 + + !scr "Hell",$8f ;just do lo-then-hi for screen codes + !scr "H",$c9 + !byte $d8 + !byte $a1 + !byte $86 + +skip_dci nop rts diff --git a/SourceGen/WpfGui/EditDataOperand.xaml.cs b/SourceGen/WpfGui/EditDataOperand.xaml.cs index 72156e5..55148e2 100644 --- a/SourceGen/WpfGui/EditDataOperand.xaml.cs +++ b/SourceGen/WpfGui/EditDataOperand.xaml.cs @@ -1283,9 +1283,9 @@ namespace SourceGen.WpfGui { } private FormatDescriptor.SubType ResolveAsciiGeneric(int offset, - FormatDescriptor.SubType subType) { + FormatDescriptor.SubType subType, byte dciAdjust = 0x00) { if (subType == FormatDescriptor.SubType.ASCII_GENERIC) { - if ((mFileData[offset] & 0x80) != 0) { + if (((mFileData[offset] & 0x80) ^ dciAdjust) != 0) { subType = FormatDescriptor.SubType.HighAscii; } else { subType = FormatDescriptor.SubType.Ascii; @@ -1406,8 +1406,16 @@ namespace SourceGen.WpfGui { if ((val & 0x80) == endMask) { // found the end of a string int length = (i - stringStart) + 1; + // High vs. low ASCII can't look at the first byte, in case it's a 1-byte + // string. We need to look at the last byte and flip the sense. (It's + // slightly easier to pass the first byte as usual, and flip it for a 1-byte + // string.) + byte dciAdjust = 0x00; + if (length == 1) { + dciAdjust = 0x80; + } FormatDescriptor dfd = FormatDescriptor.Create(length, type, - ResolveAsciiGeneric(stringStart, subType)); + ResolveAsciiGeneric(stringStart, subType, dciAdjust)); Results.Add(stringStart, dfd); stringStart = i + 1; }