From 5889f4573705054d04c261689e0ebb4c9c6d551f Mon Sep 17 00:00:00 2001 From: Andy McFadden Date: Tue, 13 Aug 2019 17:22:21 -0700 Subject: [PATCH] Replace on-screen string operand formatting The previous functions just grabbed 62 characters and slapped quotes on the ends, but that doesn't work if we want to show strings with embedded control characters. This change replaces the simple formatter with the one used to generate assembly source code. This increases the cost of refreshing the display list, so a cache will need to be added in a future change. Converters for C64 PETSCII and C64 Screen Code have been defined. The results of changing the auto-scan encoding can now be viewed. The string operand formatter was using a single delimiter, but for the on-screen version we want open-quote and close-quote, and might want to identify some encodings with a prefix. The formatter now takes a class that defines the various parts. (It might be worth replacing the delimiter patterns recently added for single-character operands with this, so we don't have two mechanisms for very nearly the same thing.) While working on this change I remembered why there were two kinds of "reverse" in the old Merlin 32 string operand generator: what you want for assembly code is different from what you want on screen. The ReverseMode enum has been resurrected. --- Asm65/CharEncoding.cs | 38 ++- Asm65/Formatter.cs | 28 ++ Asm65/StringOpFormatter.cs | 87 +++--- SourceGen/AsmGen/AsmAcme.cs | 6 +- SourceGen/AsmGen/AsmCc65.cs | 11 +- SourceGen/AsmGen/AsmMerlin32.cs | 13 +- SourceGen/AsmGen/AsmTass64.cs | 12 +- SourceGen/DataAnalysis.cs | 4 +- SourceGen/LineListGen.cs | 14 +- SourceGen/MainController.cs | 2 +- SourceGen/PseudoOp.cs | 250 ++++++------------ .../SGTestData/Source/2016-char-encoding.S | 18 +- 12 files changed, 245 insertions(+), 238 deletions(-) diff --git a/Asm65/CharEncoding.cs b/Asm65/CharEncoding.cs index 5a20953..511cae7 100644 --- a/Asm65/CharEncoding.cs +++ b/Asm65/CharEncoding.cs @@ -14,7 +14,7 @@ * limitations under the License. */ using System; -using System.Text; +using System.Diagnostics; namespace Asm65 { /// @@ -186,12 +186,24 @@ namespace Asm65 { map[0x07] = map[0x0a] = map[0x0d] = true; return map; } - public static bool IsPrintablePetscii(byte val) { + public static bool IsPrintableC64Petscii(byte val) { return sPrintablePetscii[val]; } - public static bool IsExtendedPetscii(byte val) { + public static bool IsExtendedC64Petscii(byte val) { return sExtendedPetscii[val]; } + public static char ConvertC64Petscii(byte val) { + if ((val >= 0x20 && val <= 0x40) || val == 0x5b || val == 0x5d) { + return (char)val; // number/symbols, '[', ']' + } else if (val >= 0x41 && val <= 0x5a) { + return (char)(val + 0x20); // lower case + } else if (val >= 0xc1 && val <= 0xda) { + return (char)(val - 0x80); // upper case + } else { + Debug.Assert(!IsPrintableC64Petscii(val)); + return UNPRINTABLE_CHAR; + } + } // // C64 Screen Codes @@ -200,7 +212,7 @@ namespace Asm65 { // // $00-1f: lower case letters (PETSCII $40-5f) // $20-3f: same as ASCII (PETSCII $20-3f) - // $40-5f: upper case letters (PETSCII $60-7f) + // $40-5f: upper case letters (PETSCII $60-7f / $c0-df) // $60-7f: non-ASCII symbols (PETSCII $a0-bf) // // With the high bit set, character colors are reversed. The printable ASCII set @@ -224,11 +236,25 @@ namespace Asm65 { } return map; } - public static bool IsPrintableScreenCode(byte val) { + public static bool IsPrintableC64ScreenCode(byte val) { return sPrintableScreenCode[val]; } - public static bool IsExtendedScreenCode(byte val) { + public static bool IsExtendedC64ScreenCode(byte val) { return sPrintableScreenCode[val]; } + public static char ConvertC64ScreenCode(byte val) { + if (val == 0x00 || val == 0x1b || val == 0x1d) { + return (char)(val + 0x40); // '@', '[', ']' + } else if (val >= 0x01 && val <= 0x1a) { + return (char)(val + 0x60); // lower case + } else if (val >= 0x20 && val <= 0x3f) { + return (char)(val); // numbers/symbols + } else if (val >= 0x41 && val <= 0x5a) { + return (char)(val); // upper case + } else { + Debug.Assert(!IsPrintableC64ScreenCode(val)); + return UNPRINTABLE_CHAR; + } + } } } diff --git a/Asm65/Formatter.cs b/Asm65/Formatter.cs index 6553c2b..3ea1e9d 100644 --- a/Asm65/Formatter.cs +++ b/Asm65/Formatter.cs @@ -104,6 +104,34 @@ namespace Asm65 { } } + /// + /// Container for string delimiter pieces. Instances are immutable. + /// + /// + /// The prefix is included at the start of the first line, but not included on + /// subsequent lines. This is primarily intended for the on-screen display, not + /// assembly source generation. The suffix is not used at all here; this class is + /// shared with the code that generates single-character operands. + /// + public class DelimiterSet { + public string Prefix { get; private set; } + public char OpenDelim { get; private set; } + public char CloseDelim { get; private set; } + public string Suffix { get; private set; } + + public DelimiterSet(char delim) { + OpenDelim = CloseDelim = delim; + Prefix = Suffix = string.Empty; + } + public DelimiterSet(string prefix, char openDelim, char closeDelim, string suffix) { + Prefix = prefix; + OpenDelim = openDelim; + CloseDelim = closeDelim; + Suffix = suffix; + } + } + public static DelimiterSet DOUBLE_QUOTE_DELIM = new DelimiterSet('"'); + private static readonly char[] sHexCharsLower = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; diff --git a/Asm65/StringOpFormatter.cs b/Asm65/StringOpFormatter.cs index 6144fe7..eca272c 100644 --- a/Asm65/StringOpFormatter.cs +++ b/Asm65/StringOpFormatter.cs @@ -23,11 +23,15 @@ namespace Asm65 { /// delimiters and non-printable characters. /// public class StringOpFormatter { - public CharEncoding.Convert CharConv { get; set; } + /// + /// Text direction. If text is stored in reverse order, we want to un-reverse it to + /// make it readable. This gets tricky for a multi-line item. For the assembler we + /// want to break it into lines and then reverse each chunk, but on screen we want to + /// reverse the entire thing as a single block. + /// + public enum ReverseMode { Forward, LineReverse, FullReverse }; - private char Delimiter { get; set; } - private RawOutputStyle RawStyle { get; set; } - private int MaxOperandLen { get; set; } + public CharEncoding.Convert CharConv { get; set; } // Output format for raw (non-printable) characters. Most assemblers use comma-separated // hex values, some allow dense hex strings. @@ -37,6 +41,10 @@ namespace Asm65 { public bool HasEscapedText { get; private set; } public List Lines { get; private set; } + private Formatter.DelimiterSet mDelimiterSet; + private RawOutputStyle mRawStyle; + private int mMaxOperandLen; + // Reference to array with 16 hex digits. (May be upper or lower case.) private char[] mHexChars; @@ -69,21 +77,24 @@ namespace Asm65 { /// Constructor. /// /// Reference to text formatter. - /// String delimiter character. + /// String delimiter values. /// How to format raw byte data. /// Maximum line length. /// Character conversion delegate. - public StringOpFormatter(Formatter formatter, char delimiter, RawOutputStyle byteStyle, - int maxOperandLen, CharEncoding.Convert charConv) { - Delimiter = delimiter; - RawStyle = byteStyle; - MaxOperandLen = maxOperandLen; + public StringOpFormatter(Formatter formatter, Formatter.DelimiterSet delimiterSet, + RawOutputStyle byteStyle, int maxOperandLen, CharEncoding.Convert charConv) { + mRawStyle = byteStyle; + mMaxOperandLen = maxOperandLen; CharConv = charConv; - mBuffer = new char[MaxOperandLen]; + mDelimiterSet = delimiterSet; + mBuffer = new char[mMaxOperandLen]; mHexChars = formatter.HexDigits; Lines = new List(); + // suffix not used, so we don't expect it to be set to something + Debug.Assert(string.IsNullOrEmpty(mDelimiterSet.Suffix)); + Reset(); } @@ -91,6 +102,11 @@ namespace Asm65 { mState = State.StartOfLine; mIndex = 0; Lines.Clear(); + + // Copy the prefix string into the buffer for the first line. + for (int i = 0; i < mDelimiterSet.Prefix.Length; i++) { + mBuffer[mIndex++] = mDelimiterSet.Prefix[i]; + } } /// @@ -102,7 +118,8 @@ namespace Asm65 { Debug.Assert(mState != State.Finished); char ch = CharConv(rawCh); - if (ch == Delimiter || ch == CharEncoding.UNPRINTABLE_CHAR) { + if (ch == mDelimiterSet.OpenDelim || ch == mDelimiterSet.CloseDelim || + ch == CharEncoding.UNPRINTABLE_CHAR) { // Must write it as a byte. WriteByte(rawCh); return; @@ -115,21 +132,21 @@ namespace Asm65 { // We must have 4 chars remaining (comma, open quote, new char, close quote). switch (mState) { case State.StartOfLine: - mBuffer[mIndex++] = Delimiter; + mBuffer[mIndex++] = mDelimiterSet.OpenDelim; break; case State.InQuote: - if (mIndex + 2 > MaxOperandLen) { + if (mIndex + 2 > mMaxOperandLen) { Flush(); - mBuffer[mIndex++] = Delimiter; + mBuffer[mIndex++] = mDelimiterSet.OpenDelim; } break; case State.OutQuote: - if (mIndex + 4 > MaxOperandLen) { + if (mIndex + 4 > mMaxOperandLen) { Flush(); - mBuffer[mIndex++] = Delimiter; + mBuffer[mIndex++] = mDelimiterSet.OpenDelim; } else { mBuffer[mIndex++] = ','; - mBuffer[mIndex++] = Delimiter; + mBuffer[mIndex++] = mDelimiterSet.OpenDelim; } break; default: @@ -158,20 +175,20 @@ namespace Asm65 { case State.StartOfLine: break; case State.InQuote: - int minWidth = (RawStyle == RawOutputStyle.CommaSep) ? 5 : 4; - if (mIndex + minWidth > MaxOperandLen) { + int minWidth = (mRawStyle == RawOutputStyle.CommaSep) ? 5 : 4; + if (mIndex + minWidth > mMaxOperandLen) { Flush(); } else { - mBuffer[mIndex++] = Delimiter; + mBuffer[mIndex++] = mDelimiterSet.CloseDelim; mBuffer[mIndex++] = ','; } break; case State.OutQuote: - minWidth = (RawStyle == RawOutputStyle.CommaSep) ? 4 : 2; - if (mIndex + minWidth > MaxOperandLen) { + minWidth = (mRawStyle == RawOutputStyle.CommaSep) ? 4 : 2; + if (mIndex + minWidth > mMaxOperandLen) { Flush(); } else { - if (RawStyle == RawOutputStyle.CommaSep) { + if (mRawStyle == RawOutputStyle.CommaSep) { mBuffer[mIndex++] = ','; } } @@ -181,7 +198,7 @@ namespace Asm65 { break; } - if (RawStyle == RawOutputStyle.CommaSep) { + if (mRawStyle == RawOutputStyle.CommaSep) { mBuffer[mIndex++] = '$'; } mBuffer[mIndex++] = mHexChars[val >> 4]; @@ -203,12 +220,12 @@ namespace Asm65 { switch (mState) { case State.StartOfLine: // empty string; put out a pair of delimiters - mBuffer[mIndex++] = Delimiter; - mBuffer[mIndex++] = Delimiter; + mBuffer[mIndex++] = mDelimiterSet.OpenDelim; + mBuffer[mIndex++] = mDelimiterSet.CloseDelim; break; case State.InQuote: // add delimiter and finish - mBuffer[mIndex++] = Delimiter; + mBuffer[mIndex++] = mDelimiterSet.CloseDelim; break; case State.OutQuote: // just output it @@ -216,7 +233,7 @@ namespace Asm65 { } string newStr = new string(mBuffer, 0, mIndex); - Debug.Assert(newStr.Length <= MaxOperandLen); + Debug.Assert(newStr.Length <= mMaxOperandLen); Lines.Add(newStr); mState = State.Finished; @@ -228,7 +245,7 @@ namespace Asm65 { /// Feeds the bytes into the StringGather. /// public void FeedBytes(byte[] data, int offset, int length, int leadingBytes, - bool reverse) { + ReverseMode revMode) { int startOffset = offset; int strEndOffset = offset + length; @@ -238,13 +255,13 @@ namespace Asm65 { while (leadingBytes-- > 0) { WriteByte(data[offset++]); } - if (reverse) { + if (revMode == ReverseMode.LineReverse) { // Max per line is line length minus the two delimiters. We don't allow // any hex quoting in reversed text, so this always works. (If somebody // does try to reverse text with delimiters or unprintable chars, we'll // blow out the line limit, but for a cross-assembler that should be purely // cosmetic.) - int maxPerLine = MaxOperandLen - 2; + int maxPerLine = mMaxOperandLen - 2; int numBlockLines = (length + maxPerLine - 1) / maxPerLine; for (int chunk = 0; chunk < numBlockLines; chunk++) { @@ -257,7 +274,13 @@ namespace Asm65 { WriteChar(data[off]); } } + } else if (revMode == ReverseMode.FullReverse) { + for (; offset < strEndOffset; offset++) { + int posn = startOffset + (strEndOffset - offset) - 1; + WriteChar(data[posn]); + } } else { + Debug.Assert(revMode == ReverseMode.Forward); for (; offset < strEndOffset; offset++) { WriteChar(data[offset]); } diff --git a/SourceGen/AsmGen/AsmAcme.cs b/SourceGen/AsmGen/AsmAcme.cs index 66c39b9..ac98eed 100644 --- a/SourceGen/AsmGen/AsmAcme.cs +++ b/SourceGen/AsmGen/AsmAcme.cs @@ -565,10 +565,12 @@ namespace SourceGen.AsmGen { return; } - StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, '"', + StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, + Formatter.DOUBLE_QUOTE_DELIM, StringOpFormatter.RawOutputStyle.CommaSep, MAX_OPERAND_LEN, CharEncoding.ConvertAscii); - stropf.FeedBytes(data, offset, dfd.Length, leadingBytes, false); + stropf.FeedBytes(data, offset, dfd.Length, leadingBytes, + StringOpFormatter.ReverseMode.Forward); string opcodeStr = formatter.FormatPseudoOp(sDataOpNames.StrGeneric); foreach (string str in stropf.Lines) { diff --git a/SourceGen/AsmGen/AsmCc65.cs b/SourceGen/AsmGen/AsmCc65.cs index 8be90e6..7c1fec2 100644 --- a/SourceGen/AsmGen/AsmCc65.cs +++ b/SourceGen/AsmGen/AsmCc65.cs @@ -631,9 +631,11 @@ namespace SourceGen.AsmGen { charConv = CharEncoding.ConvertAscii; } - StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, '"', - StringOpFormatter.RawOutputStyle.CommaSep, MAX_OPERAND_LEN, charConv); - stropf.FeedBytes(data, offset, dfd.Length - trailingBytes, leadingBytes, false); + StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, + Formatter.DOUBLE_QUOTE_DELIM, StringOpFormatter.RawOutputStyle.CommaSep, + MAX_OPERAND_LEN, charConv); + stropf.FeedBytes(data, offset, dfd.Length - trailingBytes, leadingBytes, + StringOpFormatter.ReverseMode.Forward); string opcodeStr = formatter.FormatPseudoOp(sDataOpNames.StrGeneric); @@ -665,7 +667,8 @@ namespace SourceGen.AsmGen { } else { // Didn't fit, so re-emit it, this time with the terminating null byte. stropf.Reset(); - stropf.FeedBytes(data, offset, dfd.Length, leadingBytes, false); + stropf.FeedBytes(data, offset, dfd.Length, leadingBytes, + StringOpFormatter.ReverseMode.Forward); } } diff --git a/SourceGen/AsmGen/AsmMerlin32.cs b/SourceGen/AsmGen/AsmMerlin32.cs index 78ad892..90f0650 100644 --- a/SourceGen/AsmGen/AsmMerlin32.cs +++ b/SourceGen/AsmGen/AsmMerlin32.cs @@ -452,7 +452,7 @@ namespace SourceGen.AsmGen { Debug.Assert(dfd.IsString); Debug.Assert(dfd.Length > 0); - bool reverse = false; + StringOpFormatter.ReverseMode revMode = StringOpFormatter.ReverseMode.Forward; int leadingBytes = 0; string opcodeStr; @@ -462,7 +462,7 @@ namespace SourceGen.AsmGen { break; case FormatDescriptor.Type.StringReverse: opcodeStr = sDataOpNames.StrReverse; - reverse = true; + revMode = StringOpFormatter.ReverseMode.LineReverse; break; case FormatDescriptor.Type.StringNullTerm: opcodeStr = sDataOpNames.StrGeneric; // no pseudo-op for this @@ -500,7 +500,8 @@ namespace SourceGen.AsmGen { delim = '\''; } - StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, delim, + StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, + new Formatter.DelimiterSet(delim), StringOpFormatter.RawOutputStyle.DenseHex, MAX_OPERAND_LEN, charConv); if (dfd.FormatType == FormatDescriptor.Type.StringDci) { // DCI is awkward because the character encoding flips on the last byte. Rather @@ -511,7 +512,7 @@ namespace SourceGen.AsmGen { // Feed bytes in, skipping over the leading length bytes. stropf.FeedBytes(data, offset + leadingBytes, - dfd.Length - leadingBytes, 0, reverse); + dfd.Length - leadingBytes, 0, revMode); Debug.Assert(stropf.Lines.Count > 0); // See if we need to do this over. @@ -524,7 +525,7 @@ namespace SourceGen.AsmGen { if (stropf.HasEscapedText) { // can't include escaped characters in REV opcodeStr = sDataOpNames.StrGeneric; - reverse = false; + revMode = StringOpFormatter.ReverseMode.Forward; redo = true; } break; @@ -563,7 +564,7 @@ namespace SourceGen.AsmGen { // This time, instead of skipping over leading length bytes, we include them // explicitly. stropf.Reset(); - stropf.FeedBytes(data, offset, dfd.Length, leadingBytes, reverse); + stropf.FeedBytes(data, offset, dfd.Length, leadingBytes, revMode); } opcodeStr = formatter.FormatPseudoOp(opcodeStr); diff --git a/SourceGen/AsmGen/AsmTass64.cs b/SourceGen/AsmGen/AsmTass64.cs index d2cca5f..bb8dec5 100644 --- a/SourceGen/AsmGen/AsmTass64.cs +++ b/SourceGen/AsmGen/AsmTass64.cs @@ -568,9 +568,9 @@ namespace SourceGen.AsmGen { return; } - StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, '"', - StringOpFormatter.RawOutputStyle.CommaSep, MAX_OPERAND_LEN, - CharEncoding.ConvertAscii); + StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, + Formatter.DOUBLE_QUOTE_DELIM,StringOpFormatter.RawOutputStyle.CommaSep, + MAX_OPERAND_LEN, CharEncoding.ConvertAscii); if (dfd.FormatType == FormatDescriptor.Type.StringDci) { // DCI is awkward because the character encoding flips on the last byte. Rather // than clutter up StringOpFormatter for this rare item, we just accept low/high @@ -580,7 +580,8 @@ namespace SourceGen.AsmGen { // Feed bytes in, skipping over hidden bytes (leading L8, trailing null). stropf.FeedBytes(data, offset + hiddenLeadingBytes, - dfd.Length - hiddenLeadingBytes - trailingBytes, shownLeadingBytes, false); + dfd.Length - hiddenLeadingBytes - trailingBytes, shownLeadingBytes, + StringOpFormatter.ReverseMode.Forward); Debug.Assert(stropf.Lines.Count > 0); // See if we need to do this over. @@ -612,7 +613,8 @@ namespace SourceGen.AsmGen { // This time, instead of skipping over leading length bytes, we include them // explicitly. stropf.Reset(); - stropf.FeedBytes(data, offset, dfd.Length, hiddenLeadingBytes, false); + stropf.FeedBytes(data, offset, dfd.Length, hiddenLeadingBytes, + StringOpFormatter.ReverseMode.Forward); } opcodeStr = formatter.FormatPseudoOp(opcodeStr); diff --git a/SourceGen/DataAnalysis.cs b/SourceGen/DataAnalysis.cs index 06477ba..d2ff844 100644 --- a/SourceGen/DataAnalysis.cs +++ b/SourceGen/DataAnalysis.cs @@ -724,11 +724,11 @@ namespace SourceGen { baseSubType = FormatDescriptor.SubType.ASCII_GENERIC; break; case TextScanMode.C64Petscii: - testPrintable = CharEncoding.IsExtendedPetscii; + testPrintable = CharEncoding.IsExtendedC64Petscii; baseSubType = FormatDescriptor.SubType.C64Petscii; break; case TextScanMode.C64ScreenCode: - testPrintable = CharEncoding.IsExtendedScreenCode; + testPrintable = CharEncoding.IsExtendedC64ScreenCode; baseSubType = FormatDescriptor.SubType.C64Screen; break; default: diff --git a/SourceGen/LineListGen.cs b/SourceGen/LineListGen.cs index 5ef0084..f55e33c 100644 --- a/SourceGen/LineListGen.cs +++ b/SourceGen/LineListGen.cs @@ -620,7 +620,7 @@ namespace SourceGen { mLineList.InsertRange(0, headerLines); GenerateLineList(mProject, mFormatter, mPseudoOpNames, - 0, mProject.FileData.Length - 1, mLineList); + mProject.FileData, 0, mProject.FileData.Length - 1, mLineList); mDisplayList.ResetList(mLineList.Count); @@ -704,7 +704,8 @@ namespace SourceGen { // Create temporary list to hold new lines. Set the initial capacity to // the previous size, on the assumption that it won't change much. List newLines = new List(endIndex - startIndex + 1); - GenerateLineList(mProject, mFormatter, mPseudoOpNames, startOffset, endOffset, newLines); + GenerateLineList(mProject, mFormatter, mPseudoOpNames, mProject.FileData, + startOffset, endOffset, newLines); // Out with the old, in with the new. mLineList.RemoveRange(startIndex, endIndex - startIndex + 1); @@ -862,7 +863,8 @@ namespace SourceGen { /// Offset of last byte. /// List to add output lines to. private static void GenerateLineList(DisasmProject proj, Formatter formatter, - PseudoOp.PseudoOpNames opNames, int startOffset, int endOffset, List lines) { + PseudoOp.PseudoOpNames opNames, byte[] data, int startOffset, int endOffset, + List lines) { //Debug.WriteLine("GenerateRange [+" + startOffset.ToString("x6") + ",+" + // endOffset.ToString("x6") + "]"); @@ -996,8 +998,12 @@ namespace SourceGen { offset += len; } else { Debug.Assert(attr.DataDescriptor != null); + // TODO: replace this with something that caches expensive items like + // string operands; maybe have an out List that is null for the + // easy stuff int numLines = - PseudoOp.ComputeRequiredLineCount(formatter, attr.DataDescriptor); + PseudoOp.ComputeRequiredLineCount(formatter, opNames, attr.DataDescriptor, + data, offset); for (int i = 0; i < numLines; i++) { Line line = new Line(offset, attr.Length, Line.Type.Data, i); lines.Add(line); diff --git a/SourceGen/MainController.cs b/SourceGen/MainController.cs index 4a803e0..9290efb 100644 --- a/SourceGen/MainController.cs +++ b/SourceGen/MainController.cs @@ -444,7 +444,7 @@ namespace SourceGen { // Set pseudo-op names. Entries aren't allowed to be blank, so we start with the // default values and merge in whatever the user has configured. - mPseudoOpNames = PseudoOp.sDefaultPseudoOpNames.GetCopy(); + mPseudoOpNames = PseudoOp.DefaultPseudoOpNames; string pseudoCereal = settings.GetString(AppSettings.FMT_PSEUDO_OP_NAMES, null); if (!string.IsNullOrEmpty(pseudoCereal)) { PseudoOp.PseudoOpNames deser = PseudoOp.PseudoOpNames.Deserialize(pseudoCereal); diff --git a/SourceGen/PseudoOp.cs b/SourceGen/PseudoOp.cs index 73af576..d94f0dd 100644 --- a/SourceGen/PseudoOp.cs +++ b/SourceGen/PseudoOp.cs @@ -142,10 +142,13 @@ namespace SourceGen { } /// - /// Some reasonable defaults for on-screen display. The object is mutable, so make - /// a copy of it. + /// Returns a new PseudoOpNames instance with some reasonable defaults for on-screen + /// display. /// - public static readonly PseudoOpNames sDefaultPseudoOpNames = new PseudoOpNames() { + public static PseudoOpNames DefaultPseudoOpNames { + get { return sDefaultPseudoOpNames.GetCopy(); } + } + private static readonly PseudoOpNames sDefaultPseudoOpNames = new PseudoOpNames() { EquDirective = ".eq", OrgDirective = ".org", RegWidthDirective = ".rwid", @@ -166,7 +169,7 @@ namespace SourceGen { StrLen16 = ".l2str", StrNullTerm = ".zstr", StrDci = ".dstr", - }; + }; /// @@ -175,36 +178,12 @@ namespace SourceGen { /// Format definition. /// Data format descriptor. /// Line count. - public static int ComputeRequiredLineCount(Formatter formatter, FormatDescriptor dfd) { + public static int ComputeRequiredLineCount(Formatter formatter, PseudoOpNames opNames, + FormatDescriptor dfd, byte[] data, int offset) { if (dfd.IsString) { - // Subtract two chars, to leave room for start/end delimiter. We use - // non-ASCII delimiters on-screen, so there's nothing to escape there. - int maxLen = MAX_OPERAND_LEN - 2; - - // Remove leading length or trailing null byte from string length. - int textLen = dfd.Length; - switch (dfd.FormatType) { - case FormatDescriptor.Type.StringGeneric: - case FormatDescriptor.Type.StringReverse: - case FormatDescriptor.Type.StringDci: - break; - case FormatDescriptor.Type.StringNullTerm: - case FormatDescriptor.Type.StringL8: - textLen--; - break; - case FormatDescriptor.Type.StringL16: - textLen -= 2; - break; - default: - Debug.Assert(false); - break; - } - int strLen = (textLen + maxLen - 1) / maxLen; - if (strLen == 0) { - // Empty string, but we still need to output a line. - strLen = 1; - } - return strLen; + List lines = FormatStringOp(formatter, opNames, dfd, data, + offset, out string popcode); + return lines.Count; } switch (dfd.FormatType) { @@ -259,45 +238,10 @@ namespace SourceGen { PseudoOut po = new PseudoOut(); if (dfd.IsString) { - // It's hard to do strings in single-line pieces because of prefix lengths, - // terminating nulls, DCI polarity, and reverse-order strings. We - // really just want to convert the whole thing to a run of chars - // and then pull out a chunk. As an optimization we can handle - // generic strings more efficiently, which should help if auto-analysis is - // creating massive strings (at least until auto-analysis learns how to do - // more complex things). - // - // TODO: consider storing the full string on the first line, then each - // subsequent line has a reference to it with offset+length - if (dfd.FormatType == FormatDescriptor.Type.StringGeneric) { - int maxPerLine = MAX_OPERAND_LEN - 2; - offset += subIndex * maxPerLine; - length -= subIndex * maxPerLine; - if (length > maxPerLine) { - length = maxPerLine; - } - char[] ltext = BytesToChars(formatter, opNames, dfd.FormatType, data, - offset, length, out string lpopcode, out int unused); - po.Opcode = lpopcode; - po.Operand = "\u201c" + new string(ltext) + "\u201d"; - } else { - char[] text = BytesToChars(formatter, opNames, dfd.FormatType, data, - offset, length, out string popcode, out int showHexZeroes); - - if (showHexZeroes == 1) { - po.Opcode = opNames.DefineData1; - po.Operand = formatter.FormatHexValue(0, 2); - } else if (showHexZeroes == 2) { - po.Opcode = opNames.DefineData2; - po.Operand = formatter.FormatHexValue(0, 4); - } else { - Debug.Assert(showHexZeroes == 0); - po.Opcode = popcode; - List outList = new List(); - GenerateTextLines(text, "\u201c", "\u201d", po, outList); - po = outList[subIndex]; - } - } + List lines = FormatStringOp(formatter, opNames, dfd, data, + offset, out string popcode); + po.Opcode = popcode; + po.Operand = lines[subIndex]; } else { switch (dfd.FormatType) { case FormatDescriptor.Type.Default: @@ -352,135 +296,99 @@ namespace SourceGen { } /// - /// Converts a collection of bytes that represent a string into an array of characters, - /// stripping the high bit. Framing data, such as leading lengths and trailing nulls, - /// are not shown. + /// Converts a collection of bytes that represent a string into an array of formatted + /// string operands. /// /// Formatter object. - /// String layout. + /// Pseudo-opcode name table. + /// Format descriptor. /// File data. /// Offset, within data, of start of string. - /// Number of bytes to convert. /// Pseudo-opcode string. - /// If nonzero, show 1+ zeroes (representing a leading - /// length or null-termination) instead of an empty string. - /// Array of characters with string data. - private static char[] BytesToChars(Formatter formatter, PseudoOpNames opNames, - FormatDescriptor.Type formatType, byte[] data, int offset, int length, - out string popcode, out int showHexZeroes) { - Debug.Assert(length > 0); + /// Array of strings. + private static List FormatStringOp(Formatter formatter, PseudoOpNames opNames, + FormatDescriptor dfd, byte[] data, int offset, out string popcode) { - // See also GenMerlin32.OutputString(). - int strOffset = offset; - int strLen = length; - bool reverse = false; + int hiddenLeadingBytes = 0; + int trailingBytes = 0; + StringOpFormatter.ReverseMode revMode = StringOpFormatter.ReverseMode.Forward; - showHexZeroes = 0; - - switch (formatType) { - case FormatDescriptor.Type.StringGeneric: - // High or low ASCII, full width specified by formatter. - popcode = opNames.StrGeneric; + CharEncoding.Convert charConv; + switch (dfd.FormatSubType) { + case FormatDescriptor.SubType.Ascii: + charConv = CharEncoding.ConvertAscii; break; - case FormatDescriptor.Type.StringDci: - // High or low ASCII, full width specified by formatter. - popcode = opNames.StrDci; + case FormatDescriptor.SubType.HighAscii: + charConv = CharEncoding.ConvertHighAscii; + break; + case FormatDescriptor.SubType.C64Petscii: + charConv = CharEncoding.ConvertC64Petscii; + break; + case FormatDescriptor.SubType.C64Screen: + charConv = CharEncoding.ConvertC64ScreenCode; + break; + default: + Debug.Assert(false); + charConv = CharEncoding.ConvertAscii; + break; + } + + switch (dfd.FormatType) { + case FormatDescriptor.Type.StringGeneric: + // Generic character data. + popcode = opNames.StrGeneric; break; case FormatDescriptor.Type.StringReverse: // High or low ASCII, full width specified by formatter. Show characters // in reverse order. popcode = opNames.StrReverse; - reverse = true; + revMode = StringOpFormatter.ReverseMode.FullReverse; break; case FormatDescriptor.Type.StringNullTerm: - // High or low ASCII, with a terminating null. Don't show the null. If - // it's an empty string, just show the null byte as hex. + // Character data with a terminating null. Don't show the null byte. popcode = opNames.StrNullTerm; - strLen--; - if (strLen == 0) { - showHexZeroes = 1; - } + trailingBytes = 1; + //if (strLen == 0) { + // showHexZeroes = 1; + //} break; case FormatDescriptor.Type.StringL8: - // High or low ASCII, with a leading length byte. Don't show the null. - // If it's an empty string, just show the length byte as hex. - strOffset++; - strLen--; - if (strLen == 0) { - showHexZeroes = 1; - } + // Character data with a leading length byte. Don't show the length. + hiddenLeadingBytes = 1; + //if (strLen == 0) { + // showHexZeroes = 1; + //} popcode = opNames.StrLen8; break; case FormatDescriptor.Type.StringL16: - // High or low ASCII, with a leading length word. Don't show the null. - // If it's an empty string, just show the length word as hex. - Debug.Assert(strLen > 1); - strOffset += 2; - strLen -= 2; - if (strLen == 0) { - showHexZeroes = 2; - } + // Character data with a leading length word. Don't show the length. + Debug.Assert(dfd.Length > 1); + hiddenLeadingBytes = 2; + //if (strLen == 0) { + // showHexZeroes = 2; + //} popcode = opNames.StrLen16; break; + case FormatDescriptor.Type.StringDci: + // High or low ASCII, with high bit on last byte flipped. Only useful + // for ASCII strings. + popcode = opNames.StrDci; + charConv = CharEncoding.ConvertLowAndHighAscii; + break; default: Debug.Assert(false); popcode = ".!!!"; break; } - // TODO(petscii): convert character encoding - char[] text = new char[strLen]; - if (!reverse) { - for (int i = 0; i < strLen; i++) { - text[i] = (char)(data[i + strOffset] & 0x7f); - } - } else { - for (int i = 0; i < strLen; i++) { - text[i] = (char)(data[strOffset + (strLen - i - 1)] & 0x7f); - } - } + Formatter.DelimiterSet delims = new Formatter.DelimiterSet( + "pfx:", '\u201c', '\u201d', string.Empty); + StringOpFormatter stropf = new StringOpFormatter(formatter, delims, + StringOpFormatter.RawOutputStyle.CommaSep, MAX_OPERAND_LEN, charConv); + stropf.FeedBytes(data, offset + hiddenLeadingBytes, + dfd.Length - hiddenLeadingBytes - trailingBytes, 0, revMode); - return text; - } - - /// - /// Generate multiple operand lines from a text line, adding optional delimiters. - /// - /// Buffer of characters to output. Must be ASCII. - /// Delimiter character(s), or the empty string. - /// Delimiter character(s), or the empty string. - /// PseudoOut with offset, length, and opcode set. Each - /// returned PseudoOut will have these value plus the generated operand. - /// List that receives the generated items. - private static void GenerateTextLines(char[] text, string startDelim, string endDelim, - PseudoOut template, List outList) { - // Could get fancy and break long strings at word boundaries. - int textOffset = 0; - - if (text.Length == 0) { - // empty string - PseudoOut po = new PseudoOut(template); - po.Operand = startDelim + endDelim; - outList.Add(po); - return; - } - - int textPerLine = MAX_OPERAND_LEN - (startDelim.Length + endDelim.Length); - StringBuilder sb = new StringBuilder(MAX_OPERAND_LEN); - while (textOffset < text.Length) { - int len = (text.Length - textOffset < textPerLine) ? - text.Length - textOffset : textPerLine; - sb.Clear(); - sb.Append(startDelim); - sb.Append(new string(text, textOffset, len)); - sb.Append(endDelim); - - PseudoOut po = new PseudoOut(template); - po.Operand = sb.ToString(); - outList.Add(po); - - textOffset += len; - } + return stropf.Lines; } /// diff --git a/SourceGen/SGTestData/Source/2016-char-encoding.S b/SourceGen/SGTestData/Source/2016-char-encoding.S index 7635f1c..6f7dd37 100644 --- a/SourceGen/SGTestData/Source/2016-char-encoding.S +++ b/SourceGen/SGTestData/Source/2016-char-encoding.S @@ -74,10 +74,10 @@ ; Get a bit fancy !byte $82 - !text "Low ASCII CRLF",$07,$0d,$0a + !text $07,"Low ASCII CRLF",$0d,$0a !byte $82 !xor $80 { - !text "High ASCII CRLF",$07,$0d,$0a + !text $07,"High ASCII CRLF",$0d,$0a } !byte $82 !pet $93,"PETSCII with ",$96,"control",$05," codes",$0d @@ -104,20 +104,28 @@ !scr "`abcdefghijklmnopqrstuvwxyz{|}~" ; The 2005 test exercises low/high ASCII strings, so no need to do that here. -; Do a quick test with PETSCII. +; Do a quick test with C64 characters. Note Screen Code can't be null-terminated, +; and PETSCII can't be DCI. !byte $84 !pet "IICSTEP esrever" ;format as StringReverse + !byte $84 !pet "null term PETSCII",0 ;format as StringNullTerm + !byte $84 !pet "This null-terminated string is too long to fit on a single line, and will be split.",0 + !byte $84 !pet 19,"PETSCII with length" ;format as StringL8 + !byte $84 !pet 20,0,"PETSCII with length2" ;format as StringL16 !byte $84 !scr "edoC neercS esrever" ;format as StringReverse - !scr "null term Screen Code",0 ;format as StringNullTerm - !scr "This null-terminated string is too long to fit on a single line, and will be split.",0 + !byte $84 !scr 23,"Screen Code with length" ;format as StringL8 + !byte $84 !scr 24,0,"Screen Code with length2" ;format as StringL16 + !byte $84 + !scr "Screen Code DC",$c9 ;format as DCI + !byte $84 !byte $85