1
0
mirror of https://github.com/fadden/6502bench.git synced 2024-05-31 22:41:37 +00:00

Allow single-character DCI strings

The DCI string format uses character values where the high bit of the
last byte differs from the rest of the string.  Usually all the high
bits are clear except on the last byte, but SourceGen generally allows
either polarity.

This gets a little uncertain with single-character strings, because
SourceGen can't auto-detect DCI very effectively.  A series of bytes
with the high bit set could be a single high-ASCII string or a series
of single-byte DCI strings.

The motivation for allowing them is C64 PETSCII.  While ASCII allows
"high ASCII" as an escape hatch, PETSCII doesn't have that option, so
there's no way to mark the data as a character or a string.  We still
want to do a bit of screening, but if the user specifies a non-ASCII
character set and the selected bytes have their high bits set, we
want to just treat the whole set as 1-byte DCI.

Some minor adjustments were needed for a couple of validity checks
that expected longer strings.

This adds some short DCI strings in different character sets to the
char-encoding regression tests.

(for issue #102)
This commit is contained in:
Andy McFadden 2021-08-08 15:38:39 -07:00
parent b58fdedfcd
commit 3368182e14
25 changed files with 1008 additions and 73 deletions

View File

@ -797,7 +797,7 @@ namespace SourceGen.AsmGen {
break;
case FormatDescriptor.Type.StringDci:
opcodeStr = sDataOpNames.StrDci;
if ((Project.FileData[offset] & 0x80) != 0) {
if ((Project.FileData[offset + dfd.Length - 1] & 0x80) == 0) {
// ".shift" directive only works for strings where the low bit starts
// clear and ends high.
OutputNoJoy(offset, dfd.Length, labelStr, commentStr);

View File

@ -1187,8 +1187,13 @@ namespace SourceGen {
/// Counts strings in Dextral Character Inverted format, meaning the high bit on the
/// last byte is the opposite of the preceding.
///
/// Each string must be at least two bytes. To reduce false-positives, we require
/// that all strings have the same hi/lo pattern.
/// To reduce false-positives, we require that all strings have the same hi/lo pattern.
///
/// Single-character strings are allowed for C64 PETSCII, which doesn't have an
/// equivalent to "high ASCII" character formatting, so long as the terminating
/// character value has its high bit set. Without this restriction, any collection
/// of characters is just a list of DCI strings, which is a weird thing to offer up
/// in the UI.
/// </summary>
/// <remarks>
/// For C64Petscii, this will identify strings that are entirely in lower case except
@ -1201,22 +1206,21 @@ namespace SourceGen {
/// <returns>Number of strings found, or -1 if bad data identified.</returns>
public static int RecognizeDciStrings(byte[] fileData, int start, int end,
CharEncoding.InclusionTest charTest) {
int expectedHiBit = fileData[start] & 0x80;
int endHiBit = fileData[end] & 0x80;
int stringCount = 0;
int stringLen = 0;
// Quick test on last byte.
if ((fileData[end] & 0x80) == expectedHiBit) {
return -1;
}
for (int i = start; i <= end; i++) {
byte val = fileData[i];
if ((val & 0x80) != expectedHiBit) {
if ((val & 0x80) == endHiBit) {
// end of string
if (stringLen == 0) {
// Got two consecutive bytes with end-marker polarity... fail.
return -1;
// Got two consecutive bytes with end-marker polarity. Allow if the
// end char high bit is set. Otherwise it's just a sequence of
// regular characters.
if (endHiBit == 0) {
return -1;
}
}
stringCount++;
stringLen = 0;
@ -1230,6 +1234,12 @@ namespace SourceGen {
//}
}
bool isAscii = charTest(0x5c); // temporary hack
if (isAscii && stringCount == end - start + 1) {
// Entire region is single-character strings. Don't allow for ASCII or
// high ASCII.
return -1;
}
return stringCount;
}
@ -1330,10 +1340,10 @@ namespace SourceGen {
}
return true;
case FormatDescriptor.Type.StringDci:
if (length < 2) {
failMsg = Res.Strings.STR_VFY_DCI_SHORT;
return false;
}
//if (length < 2) {
// failMsg = Res.Strings.STR_VFY_DCI_SHORT;
// return false;
//}
byte first = (byte)(fileData[offset] & 0x80);
for (int i = offset + 1; i < offset + length - 1; i++) {
if ((fileData[i] & 0x80) != first) {
@ -1341,7 +1351,7 @@ namespace SourceGen {
return false;
}
}
if ((fileData[offset + length - 1] & 0x80) == first) {
if (length > 1 && (fileData[offset + length - 1] & 0x80) == first) {
failMsg = Res.Strings.STR_VFY_DCI_NOT_TERMINATED;
return false;
}

View File

@ -1,8 +1,8 @@
### 6502bench SourceGen dis65 v1.0 ###
{
"_ContentVersion":4,
"FileDataLength":1244,
"FileDataCrc32":220973587,
"FileDataLength":1299,
"FileDataCrc32":-371479809,
"ProjectProps":{
"CpuName":"6502",
"IncludeUndocumentedInstr":false,
@ -666,6 +666,126 @@
"Length":33,
"Format":"StringGeneric",
"SubFormat":"HighAscii",
"SymbolRef":null},
"1247":{
"Length":5,
"Format":"StringDci",
"SubFormat":"Ascii",
"SymbolRef":null},
"1252":{
"Length":2,
"Format":"StringDci",
"SubFormat":"Ascii",
"SymbolRef":null},
"1254":{
"Length":1,
"Format":"StringDci",
"SubFormat":"Ascii",
"SymbolRef":null},
"1255":{
"Length":1,
"Format":"StringDci",
"SubFormat":"Ascii",
"SymbolRef":null},
"1257":{
"Length":5,
"Format":"StringDci",
"SubFormat":"HighAscii",
"SymbolRef":null},
"1262":{
"Length":2,
"Format":"StringDci",
"SubFormat":"HighAscii",
"SymbolRef":null},
"1264":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"Ascii",
"SymbolRef":null},
"1265":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"Ascii",
"SymbolRef":null},
"1267":{
"Length":5,
"Format":"StringDci",
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1272":{
"Length":2,
"Format":"StringDci",
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1274":{
"Length":1,
"Format":"StringDci",
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1275":{
"Length":1,
"Format":"StringDci",
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1277":{
"Length":5,
"Format":"StringDci",
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1282":{
"Length":2,
"Format":"StringDci",
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1284":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1285":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1287":{
"Length":5,
"Format":"StringDci",
"SubFormat":"C64Screen",
"SymbolRef":null},
"1292":{
"Length":2,
"Format":"StringDci",
"SubFormat":"C64Screen",
"SymbolRef":null},
"1294":{
"Length":1,
"Format":"StringDci",
"SubFormat":"C64Screen",
"SymbolRef":null},
"1295":{
"Length":1,
"Format":"StringDci",
"SubFormat":"C64Screen",
"SymbolRef":null}},
"LvTables":{

View File

@ -1,8 +1,8 @@
### 6502bench SourceGen dis65 v1.0 ###
{
"_ContentVersion":4,
"FileDataLength":1244,
"FileDataCrc32":220973587,
"FileDataLength":1299,
"FileDataCrc32":-371479809,
"ProjectProps":{
"CpuName":"6502",
"IncludeUndocumentedInstr":false,
@ -554,10 +554,238 @@
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1175":{
"Length":12,
"Format":"StringGeneric",
"SubFormat":"C64Screen",
"SymbolRef":null},
"1187":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"Hex",
"SymbolRef":null},
"1188":{
"Length":2,
"Format":"StringGeneric",
"SubFormat":"C64Screen",
"SymbolRef":null},
"1190":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"Hex",
"SymbolRef":null},
"1191":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"Hex",
"SymbolRef":null},
"1192":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"C64Screen",
"SymbolRef":null},
"1193":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"Hex",
"SymbolRef":null},
"1194":{
"Length":2,
"Format":"StringGeneric",
"SubFormat":"C64Screen",
"SymbolRef":null},
"1196":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"Hex",
"SymbolRef":null},
"1197":{
"Length":2,
"Format":"StringGeneric",
"SubFormat":"C64Screen",
"SymbolRef":null},
"1199":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"Hex",
"SymbolRef":null},
"1200":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"Hex",
"SymbolRef":null},
"1201":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"C64Screen",
"SymbolRef":null},
"1202":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"Hex",
"SymbolRef":null},
"1203":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"Hex",
"SymbolRef":null},
"1204":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"C64Screen",
"SymbolRef":null},
"1205":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"Hex",
"SymbolRef":null},
"1206":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"C64Screen",
"SymbolRef":null},
"1208":{
"Length":33,
"Format":"StringGeneric",
"SubFormat":"HighAscii",
"SymbolRef":null},
"1247":{
"Length":5,
"Format":"StringDci",
"SubFormat":"Ascii",
"SymbolRef":null},
"1252":{
"Length":2,
"Format":"StringDci",
"SubFormat":"Ascii",
"SymbolRef":null},
"1254":{
"Length":1,
"Format":"StringDci",
"SubFormat":"Ascii",
"SymbolRef":null},
"1255":{
"Length":1,
"Format":"StringDci",
"SubFormat":"Ascii",
"SymbolRef":null},
"1257":{
"Length":5,
"Format":"StringDci",
"SubFormat":"HighAscii",
"SymbolRef":null},
"1262":{
"Length":2,
"Format":"StringDci",
"SubFormat":"HighAscii",
"SymbolRef":null},
"1264":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"Ascii",
"SymbolRef":null},
"1265":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"Ascii",
"SymbolRef":null},
"1267":{
"Length":5,
"Format":"StringDci",
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1272":{
"Length":2,
"Format":"StringDci",
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1274":{
"Length":1,
"Format":"StringDci",
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1275":{
"Length":1,
"Format":"StringDci",
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1277":{
"Length":5,
"Format":"StringDci",
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1282":{
"Length":2,
"Format":"StringDci",
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1284":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1285":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1287":{
"Length":5,
"Format":"StringDci",
"SubFormat":"C64Screen",
"SymbolRef":null},
"1292":{
"Length":2,
"Format":"StringDci",
"SubFormat":"C64Screen",
"SymbolRef":null},
"1294":{
"Length":1,
"Format":"StringDci",
"SubFormat":"C64Screen",
"SymbolRef":null},
"1295":{
"Length":1,
"Format":"StringDci",
"SubFormat":"C64Screen",
"SymbolRef":null}},
"LvTables":{

View File

@ -1,8 +1,8 @@
### 6502bench SourceGen dis65 v1.0 ###
{
"_ContentVersion":4,
"FileDataLength":1244,
"FileDataCrc32":220973587,
"FileDataLength":1299,
"FileDataCrc32":-371479809,
"ProjectProps":{
"CpuName":"6502",
"IncludeUndocumentedInstr":false,
@ -666,6 +666,126 @@
"Length":33,
"Format":"StringGeneric",
"SubFormat":"HighAscii",
"SymbolRef":null},
"1247":{
"Length":5,
"Format":"StringDci",
"SubFormat":"Ascii",
"SymbolRef":null},
"1252":{
"Length":2,
"Format":"StringDci",
"SubFormat":"Ascii",
"SymbolRef":null},
"1254":{
"Length":1,
"Format":"StringDci",
"SubFormat":"Ascii",
"SymbolRef":null},
"1255":{
"Length":1,
"Format":"StringDci",
"SubFormat":"Ascii",
"SymbolRef":null},
"1257":{
"Length":5,
"Format":"StringDci",
"SubFormat":"HighAscii",
"SymbolRef":null},
"1262":{
"Length":2,
"Format":"StringDci",
"SubFormat":"HighAscii",
"SymbolRef":null},
"1264":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"Ascii",
"SymbolRef":null},
"1265":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"Ascii",
"SymbolRef":null},
"1267":{
"Length":5,
"Format":"StringDci",
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1272":{
"Length":2,
"Format":"StringDci",
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1274":{
"Length":1,
"Format":"StringDci",
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1275":{
"Length":1,
"Format":"StringDci",
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1277":{
"Length":5,
"Format":"StringDci",
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1282":{
"Length":2,
"Format":"StringDci",
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1284":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1285":{
"Length":1,
"Format":"NumericLE",
"SubFormat":"C64Petscii",
"SymbolRef":null},
"1287":{
"Length":5,
"Format":"StringDci",
"SubFormat":"C64Screen",
"SymbolRef":null},
"1292":{
"Length":2,
"Format":"StringDci",
"SubFormat":"C64Screen",
"SymbolRef":null},
"1294":{
"Length":1,
"Format":"StringDci",
"SubFormat":"C64Screen",
"SymbolRef":null},
"1295":{
"Length":1,
"Format":"StringDci",
"SubFormat":"C64Screen",
"SymbolRef":null}},
"LvTables":{

View File

@ -251,5 +251,40 @@ L144B nop
.byte $80
_L14DA nop
jmp _L1511
.byte $86
.enc "sg_ascii"
.shift "Hello"
.shift "HI"
.shift "X"
.shift "!"
.byte $86
.enc "sg_hiascii"
.byte $c8,$e5,$ec,$ec,$6f
.byte $c8,$49
.enc "sg_ascii"
.byte 'X'
.byte '!'
.byte $86
.enc "none"
.shift "hello"
.shift "hi"
.shift "x"
.shift "!"
.byte $86
.byte $c8,$c5,$cc,$cc,$4f
.byte $c8,$49
.byte 'x'
.byte '!'
.byte $86
.enc "screen"
.shift "Hello"
.shift "HI"
.shift "X"
.shift "!"
.byte $86
_L1511 nop
rts

View File

@ -220,5 +220,35 @@ L144B nop
!byte $80
@L14DA nop
jmp @L1511
!byte $86
!text "Hell",$ef
!text "H",$c9
!text $d8
!text $a1
!byte $86
!hex c8e5ecec6f
!hex c849
!byte 'X'
!byte '!'
!byte $86
!pet "hellO"
!pet "hI"
!pet "X"
!pet $a1
!byte $86
!pet "HELLo"
!pet "Hi"
!byte $58
!byte $21
!byte $86
!scr "Hell",$8f
!scr "H",$c9
!scr $d8
!scr $a1
!byte $86
@L1511 nop
rts

View File

@ -247,5 +247,35 @@ L144B: nop
.byte $80
@L14DA: nop
jmp @L1511
.byte $86
.byte "Hell",$ef
.byte "H",$c9
.byte $d8
.byte $a1
.byte $86
.byte $c8,$e5,$ec,$ec,$6f
.byte $c8,$49
.byte 'X'
.byte '!'
.byte $86
.byte $48,$45,$4c,$4c,$cf
.byte $48,$c9
.byte $d8
.byte $a1
.byte $86
.byte $c8,$c5,$cc,$cc,$4f
.byte $c8,$49
.byte $58
.byte $21
.byte $86
.byte $48,$05,$0c,$0c,$8f
.byte $48,$c9
.byte $d8
.byte $a1
.byte $86
@L1511: nop
rts

View File

@ -1,7 +1,7 @@
# 6502bench SourceGen generated linker script for 20120-char-encoding-a
MEMORY {
MAIN: file=%O, start=%S, size=65536;
# MEM000: file=%O, start=$1000, size=1244;
# MEM000: file=%O, start=$1000, size=1299;
}
SEGMENTS {
CODE: load=MAIN, type=rw;

View File

@ -214,5 +214,35 @@ L144B nop
dfb $80
:L14DA nop
jmp :L1511
dfb $86
dci 'Hello'
dci 'HI'
dci 'X'
dci '!'
dfb $86
dci "Hello"
dci "HI"
dfb 'X'
dfb '!'
dfb $86
hex 48454c4ccf
hex 48c9
hex d8
hex a1
dfb $86
hex c8c5cccc4f
hex c849
dfb $58
dfb $21
dfb $86
hex 48050c0c8f
hex 48c9
hex d8
hex a1
dfb $86
:L1511 nop
rts

View File

@ -319,22 +319,65 @@ L144B nop
.byte $5c
.byte 't'
.byte $80
.text "scr ",$05
.byte $13
.byte $03
.byte $01
.byte $10
.byte $05
.byte $13
.text " ",$1c,": ",$1c,$1c," ",$1c,$12," ",$1c
.byte $0e
.text " ",$1c,$22," ",$1c,"' ",$1c
.byte $14
.enc "screen"
.text "SCR escapes "
.byte $1c
.text ": "
.byte $1c
.byte $1c
.byte ' '
.byte $1c
.text "r "
.byte $1c
.text "n "
.byte $1c
.byte $22
.byte ' '
.byte $1c
.byte $27
.byte ' '
.byte $1c
.byte 't'
.byte $80
.enc "sg_hiascii"
.text "HIGH escapes \: \\ \r \n \0 \' \t"
.byte $80
_L14DA nop
jmp _L1511
.byte $86
.enc "sg_ascii"
.shift "Hello"
.shift "HI"
.shift "X"
.shift "!"
.byte $86
.enc "sg_hiascii"
.byte $c8,$e5,$ec,$ec,$6f
.byte $c8,$49
.enc "sg_ascii"
.byte 'X'
.byte '!'
.byte $86
.enc "none"
.shift "hello"
.shift "hi"
.shift "x"
.shift "!"
.byte $86
.byte $c8,$c5,$cc,$cc,$4f
.byte $c8,$49
.byte 'x'
.byte '!'
.byte $86
.enc "screen"
.shift "Hello"
.shift "HI"
.shift "X"
.shift "!"
.byte $86
_L1511 nop
rts

View File

@ -288,16 +288,23 @@ L144B nop
!byte $5c
!byte $54
!byte $80
!pet "scr ",$05
!byte $13
!byte $03
!byte $01
!byte $10
!byte $05
!byte $13
!pet " ",$1c,": ",$1c,$1c," ",$1c,$12," ",$1c
!byte $0e
!pet " ",$1c,$22," ",$1c,"' ",$1c
!scr "SCR escapes "
!byte $1c
!scr ": "
!byte $1c
!byte $1c
!byte $20
!byte $1c
!scr "r "
!byte $1c
!scr "n "
!byte $1c
!byte $22
!byte $20
!byte $1c
!byte $27
!byte $20
!byte $1c
!byte $14
!byte $80
!xor $80 {
@ -306,5 +313,35 @@ L144B nop
!byte $80
@L14DA nop
jmp @L1511
!byte $86
!text "Hell",$ef
!text "H",$c9
!text $d8
!text $a1
!byte $86
!hex c8e5ecec6f
!hex c849
!byte 'X'
!byte '!'
!byte $86
!pet "hellO"
!pet "hI"
!pet "X"
!pet $a1
!byte $86
!pet "HELLo"
!pet "Hi"
!byte $58
!byte $21
!byte $86
!scr "Hell",$8f
!scr "H",$c9
!scr $d8
!scr $a1
!byte $86
@L1511 nop
rts

View File

@ -317,21 +317,58 @@ L144B: nop
.byte $5c
.byte $54
.byte $80
.byte $53,$43,$52,$20,$05
.byte $13
.byte $03
.byte $01
.byte $10
.byte $05
.byte $13
.byte $20,$1c,$3a,$20,$1c,$1c,$20,$1c,$12,$20,$1c
.byte $0e
.byte $20,$1c,$22,$20,$1c,$27,$20,$1c
.byte $53,$43,$52,$20,$05,$13,$03,$01,$10,$05,$13,$20
.byte $1c
.byte $3a,$20
.byte $1c
.byte $1c
.byte $20
.byte $1c
.byte $12,$20
.byte $1c
.byte $0e,$20
.byte $1c
.byte $22
.byte $20
.byte $1c
.byte $27
.byte $20
.byte $1c
.byte $14
.byte $80
HiAscii "HIGH escapes \: \\ \r \n \0 \' \t"
.byte $80
@L14DA: nop
jmp @L1511
.byte $86
.byte "Hell",$ef
.byte "H",$c9
.byte $d8
.byte $a1
.byte $86
.byte $c8,$e5,$ec,$ec,$6f
.byte $c8,$49
.byte 'X'
.byte '!'
.byte $86
.byte $48,$45,$4c,$4c,$cf
.byte $48,$c9
.byte $d8
.byte $a1
.byte $86
.byte $c8,$c5,$cc,$cc,$4f
.byte $c8,$49
.byte $58
.byte $21
.byte $86
.byte $48,$05,$0c,$0c,$8f
.byte $48,$c9
.byte $d8
.byte $a1
.byte $86
@L1511: nop
rts

View File

@ -1,7 +1,7 @@
# 6502bench SourceGen generated linker script for 20130-char-encoding-p
MEMORY {
MAIN: file=%O, start=%S, size=65536;
# MEM000: file=%O, start=$1000, size=1244;
# MEM000: file=%O, start=$1000, size=1299;
}
SEGMENTS {
CODE: load=MAIN, type=rw;

View File

@ -285,21 +285,58 @@ L144B nop
dfb $5c
dfb $54
dfb $80
hex 5343522005
dfb $13
dfb $03
dfb $01
dfb $10
dfb $05
dfb $13
hex 201c3a201c1c201c12201c
dfb $0e
hex 201c22201c27201c
hex 534352200513030110051320
dfb $1c
hex 3a20
dfb $1c
dfb $1c
dfb $20
dfb $1c
hex 1220
dfb $1c
hex 0e20
dfb $1c
dfb $22
dfb $20
dfb $1c
dfb $27
dfb $20
dfb $1c
dfb $14
dfb $80
asc "HIGH escapes \: \\ \r \n \0 \' \t"
dfb $80
:L14DA nop
jmp :L1511
dfb $86
dci 'Hello'
dci 'HI'
dci 'X'
dci '!'
dfb $86
dci "Hello"
dci "HI"
dfb 'X'
dfb '!'
dfb $86
hex 48454c4ccf
hex 48c9
hex d8
hex a1
dfb $86
hex c8c5cccc4f
hex c849
dfb $58
dfb $21
dfb $86
hex 48050c0c8f
hex 48c9
hex d8
hex a1
dfb $86
:L1511 nop
rts

View File

@ -352,5 +352,40 @@ L144B nop
.byte $80
_L14DA nop
jmp _L1511
.byte $86
.enc "sg_ascii"
.shift "Hello"
.shift "HI"
.shift "X"
.shift "!"
.byte $86
.enc "sg_hiascii"
.byte $c8,$e5,$ec,$ec,$6f
.byte $c8,$49
.enc "sg_ascii"
.byte 'X'
.byte '!'
.byte $86
.enc "none"
.shift "hello"
.shift "hi"
.shift "x"
.shift "!"
.byte $86
.byte $c8,$c5,$cc,$cc,$4f
.byte $c8,$49
.byte 'x'
.byte '!'
.byte $86
.enc "screen"
.shift "Hello"
.shift "HI"
.shift "X"
.shift "!"
.byte $86
_L1511 nop
rts

View File

@ -322,5 +322,35 @@ L144B nop
!byte $80
@L14DA nop
jmp @L1511
!byte $86
!text "Hell",$ef
!text "H",$c9
!text $d8
!text $a1
!byte $86
!hex c8e5ecec6f
!hex c849
!byte 'X'
!byte '!'
!byte $86
!pet "hellO"
!pet "hI"
!pet "X"
!pet $a1
!byte $86
!pet "HELLo"
!pet "Hi"
!byte $58
!byte $21
!byte $86
!scr "Hell",$8f
!scr "H",$c9
!scr $d8
!scr $a1
!byte $86
@L1511 nop
rts

View File

@ -349,5 +349,35 @@ L144B: nop
.byte $80
@L14DA: nop
jmp @L1511
.byte $86
.byte "Hell",$ef
.byte "H",$c9
.byte $d8
.byte $a1
.byte $86
.byte $c8,$e5,$ec,$ec,$6f
.byte $c8,$49
.byte 'X'
.byte '!'
.byte $86
.byte $48,$45,$4c,$4c,$cf
.byte $48,$c9
.byte $d8
.byte $a1
.byte $86
.byte $c8,$c5,$cc,$cc,$4f
.byte $c8,$49
.byte $58
.byte $21
.byte $86
.byte $48,$05,$0c,$0c,$8f
.byte $48,$c9
.byte $d8
.byte $a1
.byte $86
@L1511: nop
rts

View File

@ -1,7 +1,7 @@
# 6502bench SourceGen generated linker script for 20140-char-encoding-s
MEMORY {
MAIN: file=%O, start=%S, size=65536;
# MEM000: file=%O, start=$1000, size=1244;
# MEM000: file=%O, start=$1000, size=1299;
}
SEGMENTS {
CODE: load=MAIN, type=rw;

View File

@ -317,5 +317,35 @@ L144B nop
dfb $80
:L14DA nop
jmp :L1511
dfb $86
dci 'Hello'
dci 'HI'
dci 'X'
dci '!'
dfb $86
dci "Hello"
dci "HI"
dfb 'X'
dfb '!'
dfb $86
hex 48454c4ccf
hex 48c9
hex d8
hex a1
dfb $86
hex c8c5cccc4f
hex c849
dfb $58
dfb $21
dfb $86
hex 48050c0c8f
hex 48c9
hex d8
hex a1
dfb $86
:L1511 nop
rts

View File

@ -1,7 +1,9 @@
; Copyright 2019 faddenSoft. All Rights Reserved.
; See the LICENSE.txt file for distribution terms (Apache 2.0).
;
; Assembler: ACME (has good PETSCII/screen code support)
; Assembler: ACME v0.96.4 (has good PETSCII/screen code support)
; (acme -o 20120-char-encoding 20120-char-encoding.S)
;
; 6502 version
;
; This becomes multiple tests that differ only in how the default
@ -145,8 +147,10 @@ allbytes
!hex f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff
end nop
jmp skip_bs
jmp skip_esc
; String literals with backslash escapes. Necessary because ACME v0.97
; introduced the feature and broke compatibility with previous versions.
!text "ASCII escapes \: \\ \r \n \",$22," \",$27," \t"
!byte $80
!pet "PETSCII escapes \: \\ \r \n \",$22," \",$27," \t"
@ -159,5 +163,46 @@ end nop
}
!byte $80
skip_bs nop
skip_esc nop
jmp skip_dci
; Small batches of DCI strings, for testing single-character strings.
; EDIT: format all of these as DCI strings
!byte $86
!text "Hell",$ef ;Hello
!text "H",$c9 ;Hi
!byte $d8 ;X
!byte $a1 ;!
!byte $86
!xor $80 {
!text "Hell",$ef ;Hello
!text "H",$c9 ;Hi
!byte $d8 ;X
!byte $a1 ;!
}
!byte $86
!pet "hellO"
!pet "hI"
!byte $d8
!byte $a1
!byte $86
!xor $80 { ;hi then lo; probably not necessary
!pet "hellO" ;(all C64 DCI seems to be lo-then-hi)
!pet "hI"
!byte $d8
!byte $a1
}
!byte $86
!scr "Hell",$8f ;just do lo-then-hi for screen codes
!scr "H",$c9
!byte $d8
!byte $a1
!byte $86
skip_dci nop
rts

View File

@ -1283,9 +1283,9 @@ namespace SourceGen.WpfGui {
}
private FormatDescriptor.SubType ResolveAsciiGeneric(int offset,
FormatDescriptor.SubType subType) {
FormatDescriptor.SubType subType, byte dciAdjust = 0x00) {
if (subType == FormatDescriptor.SubType.ASCII_GENERIC) {
if ((mFileData[offset] & 0x80) != 0) {
if (((mFileData[offset] & 0x80) ^ dciAdjust) != 0) {
subType = FormatDescriptor.SubType.HighAscii;
} else {
subType = FormatDescriptor.SubType.Ascii;
@ -1406,8 +1406,16 @@ namespace SourceGen.WpfGui {
if ((val & 0x80) == endMask) {
// found the end of a string
int length = (i - stringStart) + 1;
// High vs. low ASCII can't look at the first byte, in case it's a 1-byte
// string. We need to look at the last byte and flip the sense. (It's
// slightly easier to pass the first byte as usual, and flip it for a 1-byte
// string.)
byte dciAdjust = 0x00;
if (length == 1) {
dciAdjust = 0x80;
}
FormatDescriptor dfd = FormatDescriptor.Create(length, type,
ResolveAsciiGeneric(stringStart, subType));
ResolveAsciiGeneric(stringStart, subType, dciAdjust));
Results.Add(stringStart, dfd);
stringStart = i + 1;
}