From 635084db9d218949b4a0c2a67a240177b07dfb1f Mon Sep 17 00:00:00 2001 From: Andy McFadden Date: Tue, 10 Aug 2021 14:08:39 -0700 Subject: [PATCH] Fix DCI string edge case If a DCI string ended with a string delimiter or non-ASCII character (e.g. a PETSCII char with no ASCII equivalent), the code generator output the last byte as a hex value. This caused an error because it was outputting the raw hex value, with the high bit already set, which the assembler did not expect. This change corrects the behavior for code generation and on-screen display, and adds a few samples to the regression test suite. (see issue #102) --- Asm65/StringOpFormatter.cs | 17 +++- SourceGen/AsmGen/AsmMerlin32.cs | 9 +- SourceGen/AsmGen/AsmTass64.cs | 15 +--- SourceGen/PseudoOp.cs | 45 +++------- SourceGen/SGTestData/20120-char-encoding-a | Bin 1299 -> 1417 bytes .../SGTestData/20120-char-encoding-a.dis65 | 82 +++++++++++++++++- SourceGen/SGTestData/20130-char-encoding-p | Bin 1299 -> 1417 bytes .../SGTestData/20130-char-encoding-p.dis65 | 82 +++++++++++++++++- SourceGen/SGTestData/20140-char-encoding-s | Bin 1299 -> 1417 bytes .../SGTestData/20140-char-encoding-s.dis65 | 82 +++++++++++++++++- .../Expected/20120-char-encoding-a_64tass.S | 30 ++++++- .../Expected/20120-char-encoding-a_acme.S | 26 +++++- .../Expected/20120-char-encoding-a_cc65.S | 26 +++++- .../Expected/20120-char-encoding-a_cc65.cfg | 2 +- .../Expected/20120-char-encoding-a_merlin32.S | 26 +++++- .../Expected/20130-char-encoding-p_64tass.S | 30 ++++++- .../Expected/20130-char-encoding-p_acme.S | 26 +++++- .../Expected/20130-char-encoding-p_cc65.S | 26 +++++- .../Expected/20130-char-encoding-p_cc65.cfg | 2 +- .../Expected/20130-char-encoding-p_merlin32.S | 26 +++++- .../Expected/20140-char-encoding-s_64tass.S | 30 ++++++- .../Expected/20140-char-encoding-s_acme.S | 26 +++++- .../Expected/20140-char-encoding-s_cc65.S | 26 +++++- .../Expected/20140-char-encoding-s_cc65.cfg | 2 +- .../Expected/20140-char-encoding-s_merlin32.S | 26 +++++- .../SGTestData/Source/20010-string-types.S | 4 +- .../SGTestData/Source/20120-char-encoding.S | 40 +++++++++ 27 files changed, 617 insertions(+), 89 deletions(-) diff --git a/Asm65/StringOpFormatter.cs b/Asm65/StringOpFormatter.cs index b6a2bd3..6be70f8 100644 --- a/Asm65/StringOpFormatter.cs +++ b/Asm65/StringOpFormatter.cs @@ -31,8 +31,17 @@ namespace Asm65 { /// public enum ReverseMode { Forward, LineReverse, FullReverse }; + /// + /// Character encoding conversion delegate. This function converts a raw byte value + /// to a printable value, or CharEncoding.UNPRINTABLE_CHAR. + /// public CharEncoding.Convert CharConv { get; set; } + /// + /// True if the input bytes are a DCI string. Only compatible with ReverseMode==Forward. + /// + public bool IsDciString { get; set; } = false; + // Output format for raw (non-printable) characters. Most assemblers use comma-separated // hex values, some allow dense hex strings. public enum RawOutputStyle { DenseHex, CommaSep }; @@ -252,6 +261,8 @@ namespace Asm65 { /// public void FeedBytes(byte[] data, int offset, int length, int leadingBytes, ReverseMode revMode) { + Debug.Assert(!IsDciString || revMode == ReverseMode.Forward); + int startOffset = offset; int strEndOffset = offset + length; @@ -288,7 +299,11 @@ namespace Asm65 { } else { Debug.Assert(revMode == ReverseMode.Forward); for (; offset < strEndOffset; offset++) { - WriteChar(data[offset]); + byte val = data[offset]; + if (IsDciString && offset == strEndOffset - 1) { + val ^= 0x80; + } + WriteChar(val); } } diff --git a/SourceGen/AsmGen/AsmMerlin32.cs b/SourceGen/AsmGen/AsmMerlin32.cs index 70393a2..b2f3546 100644 --- a/SourceGen/AsmGen/AsmMerlin32.cs +++ b/SourceGen/AsmGen/AsmMerlin32.cs @@ -626,12 +626,7 @@ namespace SourceGen.AsmGen { StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, new Formatter.DelimiterDef(delim), StringOpFormatter.RawOutputStyle.DenseHex, charConv, false); - if (dfd.FormatType == FormatDescriptor.Type.StringDci) { - // DCI is awkward because the character encoding flips on the last byte. Rather - // than clutter up StringOpFormatter for this rare item, we just accept low/high - // throughout. - stropf.CharConv = CharEncoding.ConvertLowAndHighAscii; - } + stropf.IsDciString = (dfd.FormatType == FormatDescriptor.Type.StringDci); // Feed bytes in, skipping over the leading length bytes. stropf.FeedBytes(data, offset + leadingBytes, @@ -672,7 +667,7 @@ namespace SourceGen.AsmGen { if (stropf.Lines.Count != 1) { // single-line only opcodeStr = sDataOpNames.StrGeneric; - stropf.CharConv = charConv; + stropf.IsDciString = false; redo = true; } break; diff --git a/SourceGen/AsmGen/AsmTass64.cs b/SourceGen/AsmGen/AsmTass64.cs index bf4921b..5fcb6ba 100644 --- a/SourceGen/AsmGen/AsmTass64.cs +++ b/SourceGen/AsmGen/AsmTass64.cs @@ -750,23 +750,18 @@ namespace SourceGen.AsmGen { Debug.Assert(dfd.Length > 0); CharEncoding.Convert charConv = null; - CharEncoding.Convert dciConv = null; switch (dfd.FormatSubType) { case FormatDescriptor.SubType.Ascii: charConv = CharEncoding.ConvertAscii; - dciConv = CharEncoding.ConvertLowAndHighAscii; break; case FormatDescriptor.SubType.HighAscii: charConv = CharEncoding.ConvertHighAscii; - dciConv = CharEncoding.ConvertLowAndHighAscii; break; case FormatDescriptor.SubType.C64Petscii: charConv = CharEncoding.ConvertC64Petscii; - dciConv = CharEncoding.ConvertLowAndHighC64Petscii; break; case FormatDescriptor.SubType.C64Screen: charConv = CharEncoding.ConvertC64ScreenCode; - dciConv = CharEncoding.ConvertLowAndHighC64ScreenCode; break; default: break; @@ -808,6 +803,7 @@ namespace SourceGen.AsmGen { if ((Project.FileData[offset + dfd.Length - 1] & 0x80) == 0) { // ".shift" directive only works for strings where the low bit starts // clear and ends high. + // TODO(maybe): this is sub-optimal for high-ASCII DCI strings. OutputNoJoy(offset, dfd.Length, labelStr, commentStr); return; } @@ -820,12 +816,7 @@ namespace SourceGen.AsmGen { StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, Formatter.DOUBLE_QUOTE_DELIM,StringOpFormatter.RawOutputStyle.CommaSep, charConv, false); - if (dfd.FormatType == FormatDescriptor.Type.StringDci) { - // DCI is awkward because the character encoding flips on the last byte. Rather - // than clutter up StringOpFormatter for this rare item, we just accept low/high - // throughout. - stropf.CharConv = dciConv; - } + stropf.IsDciString = (dfd.FormatType == FormatDescriptor.Type.StringDci); // Feed bytes in, skipping over hidden bytes (leading L8, trailing null). stropf.FeedBytes(data, offset + hiddenLeadingBytes, @@ -847,7 +838,7 @@ namespace SourceGen.AsmGen { if (stropf.Lines.Count != 1) { // Must be single-line. opcodeStr = sDataOpNames.StrGeneric; - stropf.CharConv = charConv; // undo DCI hack + stropf.IsDciString = false; redo = true; } break; diff --git a/SourceGen/PseudoOp.cs b/SourceGen/PseudoOp.cs index 8ba882a..13c4fbb 100644 --- a/SourceGen/PseudoOp.cs +++ b/SourceGen/PseudoOp.cs @@ -442,13 +442,11 @@ namespace SourceGen { /// Format descriptor. /// File data. /// Offset, within data, of start of string. - /// Pseudo-opcode string. + /// Receives the pseudo-opcode string. /// Array of operand strings. public static List FormatStringOp(Formatter formatter, PseudoOpNames opNames, FormatDescriptor dfd, byte[] data, int offset, out string popcode) { - int hiddenLeadingBytes = 0; - int trailingBytes = 0; StringOpFormatter.ReverseMode revMode = StringOpFormatter.ReverseMode.Forward; Formatter.DelimiterSet delSet = formatter.Config.mStringDelimiters; Formatter.DelimiterDef delDef; @@ -456,35 +454,19 @@ namespace SourceGen { CharEncoding.Convert charConv; switch (dfd.FormatSubType) { case FormatDescriptor.SubType.Ascii: - if (dfd.FormatType == FormatDescriptor.Type.StringDci) { - charConv = CharEncoding.ConvertLowAndHighAscii; - } else { - charConv = CharEncoding.ConvertAscii; - } + charConv = CharEncoding.ConvertAscii; delDef = delSet.Get(CharEncoding.Encoding.Ascii); break; case FormatDescriptor.SubType.HighAscii: - if (dfd.FormatType == FormatDescriptor.Type.StringDci) { - charConv = CharEncoding.ConvertLowAndHighAscii; - } else { - charConv = CharEncoding.ConvertHighAscii; - } + charConv = CharEncoding.ConvertHighAscii; delDef = delSet.Get(CharEncoding.Encoding.HighAscii); break; case FormatDescriptor.SubType.C64Petscii: - if (dfd.FormatType == FormatDescriptor.Type.StringDci) { - charConv = CharEncoding.ConvertLowAndHighC64Petscii; - } else { - charConv = CharEncoding.ConvertC64Petscii; - } + charConv = CharEncoding.ConvertC64Petscii; delDef = delSet.Get(CharEncoding.Encoding.C64Petscii); break; case FormatDescriptor.SubType.C64Screen: - if (dfd.FormatType == FormatDescriptor.Type.StringDci) { - charConv = CharEncoding.ConvertLowAndHighC64ScreenCode; - } else { - charConv = CharEncoding.ConvertC64ScreenCode; - } + charConv = CharEncoding.ConvertC64ScreenCode; delDef = delSet.Get(CharEncoding.Encoding.C64ScreenCode); break; default: @@ -498,6 +480,11 @@ namespace SourceGen { delDef = Formatter.DOUBLE_QUOTE_DELIM; } + StringOpFormatter stropf = new StringOpFormatter(formatter, delDef, + StringOpFormatter.RawOutputStyle.CommaSep, charConv, false); + + int hiddenLeadingBytes = 0; + int trailingBytes = 0; switch (dfd.FormatType) { case FormatDescriptor.Type.StringGeneric: // Generic character data. @@ -513,30 +500,22 @@ namespace SourceGen { // Character data with a terminating null. Don't show the null byte. popcode = opNames.StrNullTerm; trailingBytes = 1; - //if (strLen == 0) { - // showHexZeroes = 1; - //} break; case FormatDescriptor.Type.StringL8: // Character data with a leading length byte. Don't show the length. hiddenLeadingBytes = 1; - //if (strLen == 0) { - // showHexZeroes = 1; - //} popcode = opNames.StrLen8; break; case FormatDescriptor.Type.StringL16: // Character data with a leading length word. Don't show the length. Debug.Assert(dfd.Length > 1); hiddenLeadingBytes = 2; - //if (strLen == 0) { - // showHexZeroes = 2; - //} popcode = opNames.StrLen16; break; case FormatDescriptor.Type.StringDci: // High bit on last byte is flipped. popcode = opNames.StrDci; + stropf.IsDciString = true; break; default: Debug.Assert(false); @@ -544,8 +523,6 @@ namespace SourceGen { break; } - StringOpFormatter stropf = new StringOpFormatter(formatter, delDef, - StringOpFormatter.RawOutputStyle.CommaSep, charConv, false); stropf.FeedBytes(data, offset + hiddenLeadingBytes, dfd.Length - hiddenLeadingBytes - trailingBytes, 0, revMode); diff --git a/SourceGen/SGTestData/20120-char-encoding-a b/SourceGen/SGTestData/20120-char-encoding-a index eaac713643fa32011a3a6461605e81fabc1c0153..a0953cb6f4aa4b26cb95276c32278b35e4c09640 100644 GIT binary patch delta 134 zcmbQt)yci#4hv)Zh*V^EEC2 delta 16 XcmeC=p3Jr34hy5;h*V^EEC2 delta 16 XcmeC=p3Jr34hy5;h*V^EEC2 delta 16 XcmeC=p3Jr34hy5;