diff --git a/Asm65/CharEncoding.cs b/Asm65/CharEncoding.cs
new file mode 100644
index 0000000..7f7b6a5
--- /dev/null
+++ b/Asm65/CharEncoding.cs
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2019 faddenSoft
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Asm65 {
+ ///
+ /// Character encoding helper methods.
+ ///
+ public static class CharEncoding {
+ public const char UNPRINTABLE_CHAR = '\ufffd';
+
+ ///
+ /// Determines whether the byte represents a character in the character set.
+ ///
+ public delegate bool InclusionTest(byte val);
+
+ ///
+ /// Converts the byte to a printable character. Returns UNPRINTABLE_CHAR if the value
+ /// does not map to something printable.
+ ///
+ ///
+ /// Yes, I'm assuming it all fits in the Unicode BMP. Should be a safe assumption
+ /// for 8-bit computer character sets.
+ ///
+ public delegate char Convert(byte val);
+
+ //
+ // Standard ASCII.
+ //
+ public static bool IsPrintableLowAscii(byte val) {
+ return (val >= 0x20 && val < 0x7f);
+ }
+ public static bool IsExtendedLowAscii(byte val) {
+ return IsPrintableLowAscii(val) || val == 0x0a || val == 0x0d;
+ }
+ public static char ConvertLowAscii(byte val) {
+ if (IsPrintableLowAscii(val)) {
+ return (char)val;
+ } else {
+ return UNPRINTABLE_CHAR;
+ }
+ }
+
+ //
+ // Standard ASCII, but with the high bit set.
+ //
+ public static bool IsPrintableHighAscii(byte val) {
+ return (val >= 0xa0 && val < 0xff);
+ }
+ public static bool IsExtendedHighAscii(byte val) {
+ return IsPrintableHighAscii(val) || val == 0x8a || val == 0x8d;
+ }
+ public static char ConvertHighAscii(byte val) {
+ if (IsPrintableHighAscii(val)) {
+ return (char)(val & 0x7f);
+ } else {
+ return UNPRINTABLE_CHAR;
+ }
+ }
+
+ //
+ // High *or* low ASCII.
+ //
+ public static char ConvertLowAndHighAscii(byte val) {
+ if (IsPrintableLowAscii(val) || IsPrintableHighAscii(val)) {
+ return (char)(val & 0x7f);
+ } else {
+ return UNPRINTABLE_CHAR;
+ }
+ }
+
+ }
+}
diff --git a/SourceGen/AsmGen/StringGather.cs b/Asm65/StringOpFormatter.cs
similarity index 55%
rename from SourceGen/AsmGen/StringGather.cs
rename to Asm65/StringOpFormatter.cs
index 5be7085..6144fe7 100644
--- a/SourceGen/AsmGen/StringGather.cs
+++ b/Asm65/StringOpFormatter.cs
@@ -14,35 +14,30 @@
* limitations under the License.
*/
using System;
+using System.Collections.Generic;
using System.Diagnostics;
-namespace SourceGen.AsmGen {
+namespace Asm65 {
///
- /// Multi-line string gatherer. Accumulates characters and raw bytes, emitting
- /// them when we have a full operand's worth.
- ///
- /// If the delimiter character appears, it will be output inline as a raw byte.
- /// The low-ASCII string ['hello'world'] will become [27,'hello',27,'world',27]
- /// (or something similar).
+ /// String pseudo-op formatter. Handles character encoding conversion and quoting of
+ /// delimiters and non-printable characters.
///
- public class StringGather {
- // Inputs.
- public IGenerator Gen { get; private set; }
- public string Label { get; private set; }
- public string Opcode { get; private set; }
- public string Comment { get; private set; }
- public char Delimiter { get; private set; }
- public char DelimiterReplacement { get; private set; }
- public ByteStyle ByteStyleX { get; private set; }
- public int MaxOperandLen { get; private set; }
- public bool IsTestRun { get; private set; }
+ public class StringOpFormatter {
+ public CharEncoding.Convert CharConv { get; set; }
- public enum ByteStyle { DenseHex, CommaSep };
+ private char Delimiter { get; set; }
+ private RawOutputStyle RawStyle { get; set; }
+ private int MaxOperandLen { get; set; }
+
+ // Output format for raw (non-printable) characters. Most assemblers use comma-separated
+ // hex values, some allow dense hex strings.
+ public enum RawOutputStyle { DenseHex, CommaSep };
// Outputs.
- public bool HasDelimiter { get; private set; }
- public int NumLinesOutput { get; private set; }
+ public bool HasEscapedText { get; private set; }
+ public List Lines { get; private set; }
+ // Reference to array with 16 hex digits. (May be upper or lower case.)
private char[] mHexChars;
///
@@ -50,12 +45,13 @@ namespace SourceGen.AsmGen {
/// because they're mixed with bytes, particularly when we have to escape the
/// delimiter character. Strings might start or end with escaped delimiters,
/// so we don't add them until we have to.
+ ///
private char[] mBuffer;
///
/// Next available character position.
///
- private int mIndex = 0;
+ private int mIndex;
///
/// State of the buffer, based on the last thing we added.
@@ -64,48 +60,51 @@ namespace SourceGen.AsmGen {
Unknown = 0,
StartOfLine,
InQuote,
- OutQuote
+ OutQuote,
+ Finished
}
- private State mState = State.StartOfLine;
+ private State mState;
///
/// Constructor.
///
- /// Reference back to generator, for output function and
- /// format options.
- /// Line label. Appears on first output line only.
- /// Opcode to use for all lines.
- /// End-of-line comment. Appears on first output line
- /// only.
+ /// Reference to text formatter.
/// String delimiter character.
- /// If true, no file output is produced.
- public StringGather(IGenerator gen, string label, string opcode,
- string comment, char delimiter, char delimReplace, ByteStyle byteStyle,
- int maxOperandLen, bool isTestRun) {
- Gen = gen;
- Label = label;
- Opcode = opcode;
- Comment = comment;
+ /// How to format raw byte data.
+ /// Maximum line length.
+ /// Character conversion delegate.
+ public StringOpFormatter(Formatter formatter, char delimiter, RawOutputStyle byteStyle,
+ int maxOperandLen, CharEncoding.Convert charConv) {
Delimiter = delimiter;
- DelimiterReplacement = delimReplace;
- ByteStyleX = byteStyle;
+ RawStyle = byteStyle;
MaxOperandLen = maxOperandLen;
- IsTestRun = isTestRun;
+ CharConv = charConv;
mBuffer = new char[MaxOperandLen];
- mHexChars = Gen.SourceFormatter.HexDigits;
+ mHexChars = formatter.HexDigits;
+ Lines = new List();
+
+ Reset();
+ }
+
+ public void Reset() {
+ mState = State.StartOfLine;
+ mIndex = 0;
+ Lines.Clear();
}
///
- /// Write a character into the buffer.
+ /// Write a character into the buffer. If the character matches the delimiter, or
+ /// isn't printable, the raw character value will be written as a byte instead.
///
- /// Character to add.
- public void WriteChar(char ch) {
- Debug.Assert(ch >= 0 && ch <= 0xff);
- if (ch == Delimiter) {
+ /// Raw character value.
+ public void WriteChar(byte rawCh) {
+ Debug.Assert(mState != State.Finished);
+
+ char ch = CharConv(rawCh);
+ if (ch == Delimiter || ch == CharEncoding.UNPRINTABLE_CHAR) {
// Must write it as a byte.
- HasDelimiter = true;
- WriteByte((byte)DelimiterReplacement);
+ WriteByte(rawCh);
return;
}
@@ -146,6 +145,10 @@ namespace SourceGen.AsmGen {
///
/// Value to add.
public void WriteByte(byte val) {
+ Debug.Assert(mState != State.Finished);
+
+ HasEscapedText = true;
+
// If we're at the start of a line, just output the byte.
// If we're inside quotes, emit a delimiter, comma, and the byte. We must
// have space for four (DenseHex) or five (CommaSep) chars.
@@ -155,7 +158,7 @@ namespace SourceGen.AsmGen {
case State.StartOfLine:
break;
case State.InQuote:
- int minWidth = (ByteStyleX == ByteStyle.CommaSep) ? 5 : 4;
+ int minWidth = (RawStyle == RawOutputStyle.CommaSep) ? 5 : 4;
if (mIndex + minWidth > MaxOperandLen) {
Flush();
} else {
@@ -164,11 +167,11 @@ namespace SourceGen.AsmGen {
}
break;
case State.OutQuote:
- minWidth = (ByteStyleX == ByteStyle.CommaSep) ? 4 : 2;
+ minWidth = (RawStyle == RawOutputStyle.CommaSep) ? 4 : 2;
if (mIndex + minWidth > MaxOperandLen) {
Flush();
} else {
- if (ByteStyleX == ByteStyle.CommaSep) {
+ if (RawStyle == RawOutputStyle.CommaSep) {
mBuffer[mIndex++] = ',';
}
}
@@ -178,7 +181,7 @@ namespace SourceGen.AsmGen {
break;
}
- if (ByteStyleX == ByteStyle.CommaSep) {
+ if (RawStyle == RawOutputStyle.CommaSep) {
mBuffer[mIndex++] = '$';
}
mBuffer[mIndex++] = mHexChars[val >> 4];
@@ -202,26 +205,65 @@ namespace SourceGen.AsmGen {
// empty string; put out a pair of delimiters
mBuffer[mIndex++] = Delimiter;
mBuffer[mIndex++] = Delimiter;
- NumLinesOutput++;
break;
case State.InQuote:
// add delimiter and finish
mBuffer[mIndex++] = Delimiter;
- NumLinesOutput++;
break;
case State.OutQuote:
// just output it
- NumLinesOutput++;
break;
}
- if (!IsTestRun) {
- Gen.OutputLine(Label, Opcode, new string(mBuffer, 0, mIndex),
- Comment);
- }
- mIndex = 0;
- // Erase these after first use so we don't put them on every line.
- Label = Comment = string.Empty;
+ string newStr = new string(mBuffer, 0, mIndex);
+ Debug.Assert(newStr.Length <= MaxOperandLen);
+ Lines.Add(newStr);
+
+ mState = State.Finished;
+
+ mIndex = 0;
+ }
+
+ ///
+ /// Feeds the bytes into the StringGather.
+ ///
+ public void FeedBytes(byte[] data, int offset, int length, int leadingBytes,
+ bool reverse) {
+ int startOffset = offset;
+ int strEndOffset = offset + length;
+
+ // Write leading bytes. This is used for the 8- or 16-bit length (when no
+ // appropriate pseudo-op is available), because we want to output that as hex
+ // even if it maps to a printable character.
+ while (leadingBytes-- > 0) {
+ WriteByte(data[offset++]);
+ }
+ if (reverse) {
+ // Max per line is line length minus the two delimiters. We don't allow
+ // any hex quoting in reversed text, so this always works. (If somebody
+ // does try to reverse text with delimiters or unprintable chars, we'll
+ // blow out the line limit, but for a cross-assembler that should be purely
+ // cosmetic.)
+ int maxPerLine = MaxOperandLen - 2;
+ int numBlockLines = (length + maxPerLine - 1) / maxPerLine;
+
+ for (int chunk = 0; chunk < numBlockLines; chunk++) {
+ int chunkOffset = startOffset + chunk * maxPerLine;
+ int endOffset = chunkOffset + maxPerLine;
+ if (endOffset > strEndOffset) {
+ endOffset = strEndOffset;
+ }
+ for (int off = endOffset - 1; off >= chunkOffset; off--) {
+ WriteChar(data[off]);
+ }
+ }
+ } else {
+ for (; offset < strEndOffset; offset++) {
+ WriteChar(data[offset]);
+ }
+ }
+
+ Finish();
}
}
}
diff --git a/SourceGen/AsmGen/AsmAcme.cs b/SourceGen/AsmGen/AsmAcme.cs
index 37d73f2..6f08de9 100644
--- a/SourceGen/AsmGen/AsmAcme.cs
+++ b/SourceGen/AsmGen/AsmAcme.cs
@@ -542,74 +542,25 @@ namespace SourceGen.AsmGen {
bool highAscii = false;
int leadingBytes = 0;
- int trailingBytes = 0;
- bool showLeading = false;
- bool showTrailing = false;
switch (dfd.FormatType) {
case FormatDescriptor.Type.StringGeneric:
- highAscii = (data[offset] & 0x80) != 0;
- break;
+ case FormatDescriptor.Type.StringReverse:
+ case FormatDescriptor.Type.StringNullTerm:
case FormatDescriptor.Type.StringDci:
highAscii = (data[offset] & 0x80) != 0;
- trailingBytes = 1;
- showTrailing = true;
- break;
- case FormatDescriptor.Type.StringReverse:
- highAscii = (data[offset] & 0x80) != 0;
- break;
- case FormatDescriptor.Type.StringNullTerm:
- highAscii = (data[offset] & 0x80) != 0;
- trailingBytes = 1;
- showTrailing = true;
break;
case FormatDescriptor.Type.StringL8:
if (dfd.Length > 1) {
highAscii = (data[offset + 1] & 0x80) != 0;
}
leadingBytes = 1;
- showLeading = true;
break;
case FormatDescriptor.Type.StringL16:
if (dfd.Length > 2) {
highAscii = (data[offset + 2] & 0x80) != 0;
}
leadingBytes = 2;
- showLeading = true;
- break;
- default:
- Debug.Assert(false);
- return;
- }
-
- char delim = '"';
- StringGather gath = null;
-
- // Run the string through so we can see if it'll fit on one line. As a minor
- // optimization, we skip this step for "generic" strings, which are probably
- // the most common thing.
- if (dfd.FormatSubType != FormatDescriptor.SubType.None || highAscii) {
- gath = new StringGather(this, labelStr, "???", commentStr, delim,
- delim, StringGather.ByteStyle.CommaSep, MAX_OPERAND_LEN, true);
- FeedGath(gath, data, offset, dfd.Length, leadingBytes, showLeading,
- trailingBytes, showTrailing);
- Debug.Assert(gath.NumLinesOutput > 0);
- }
-
- string opcodeStr = formatter.FormatPseudoOp(sDataOpNames.StrGeneric);
-
- switch (dfd.FormatType) {
- case FormatDescriptor.Type.StringGeneric:
- // TODO(someday): something fancy with encodings to handle high-ASCII text?
- break;
- case FormatDescriptor.Type.StringDci:
- case FormatDescriptor.Type.StringReverse:
- // Fully configured above.
- break;
- case FormatDescriptor.Type.StringNullTerm:
- case FormatDescriptor.Type.StringL8:
- case FormatDescriptor.Type.StringL16:
- // Implement as macro?
break;
default:
Debug.Assert(false);
@@ -621,35 +572,16 @@ namespace SourceGen.AsmGen {
return;
}
- // Create a new StringGather, with the final opcode choice.
- gath = new StringGather(this, labelStr, opcodeStr, commentStr, delim,
- delim, StringGather.ByteStyle.CommaSep, MAX_OPERAND_LEN, false);
- FeedGath(gath, data, offset, dfd.Length, leadingBytes, showLeading,
- trailingBytes, showTrailing);
- }
+ StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, '"',
+ StringOpFormatter.RawOutputStyle.CommaSep, MAX_OPERAND_LEN,
+ CharEncoding.ConvertLowAscii);
+ stropf.FeedBytes(data, offset, dfd.Length, leadingBytes, false);
- ///
- /// Feeds the bytes into the StringGather.
- ///
- private void FeedGath(StringGather gath, byte[] data, int offset, int length,
- int leadingBytes, bool showLeading, int trailingBytes, bool showTrailing) {
- int startOffset = offset;
- int strEndOffset = offset + length - trailingBytes;
-
- if (showLeading) {
- while (leadingBytes-- > 0) {
- gath.WriteByte(data[offset++]);
- }
- } else {
- offset += leadingBytes;
+ string opcodeStr = formatter.FormatPseudoOp(sDataOpNames.StrGeneric);
+ foreach (string str in stropf.Lines) {
+ OutputLine(labelStr, opcodeStr, str, commentStr);
+ labelStr = commentStr = string.Empty; // only show on first
}
- for (; offset < strEndOffset; offset++) {
- gath.WriteChar((char)(data[offset] & 0x7f));
- }
- while (showTrailing && trailingBytes-- > 0) {
- gath.WriteByte(data[offset++]);
- }
- gath.Finish();
}
}
diff --git a/SourceGen/AsmGen/AsmCc65.cs b/SourceGen/AsmGen/AsmCc65.cs
index a8fd438..1a4164b 100644
--- a/SourceGen/AsmGen/AsmCc65.cs
+++ b/SourceGen/AsmGen/AsmCc65.cs
@@ -598,135 +598,89 @@ namespace SourceGen.AsmGen {
bool highAscii = false;
int leadingBytes = 0;
int trailingBytes = 0;
- bool showLeading = false;
- bool showTrailing = false;
switch (dfd.FormatType) {
case FormatDescriptor.Type.StringGeneric:
- highAscii = (data[offset] & 0x80) != 0;
- break;
- case FormatDescriptor.Type.StringDci:
- highAscii = (data[offset] & 0x80) != 0;
- trailingBytes = 1;
- showTrailing = true;
- break;
case FormatDescriptor.Type.StringReverse:
+ case FormatDescriptor.Type.StringDci:
highAscii = (data[offset] & 0x80) != 0;
break;
case FormatDescriptor.Type.StringNullTerm:
highAscii = (data[offset] & 0x80) != 0;
trailingBytes = 1;
- showTrailing = true;
break;
case FormatDescriptor.Type.StringL8:
if (dfd.Length > 1) {
highAscii = (data[offset + 1] & 0x80) != 0;
}
leadingBytes = 1;
- showLeading = true;
break;
case FormatDescriptor.Type.StringL16:
if (dfd.Length > 2) {
highAscii = (data[offset + 2] & 0x80) != 0;
}
leadingBytes = 2;
- showLeading = true;
break;
default:
Debug.Assert(false);
return;
}
- char delim = '"';
- StringGather gath = null;
-
- // Run the string through so we can see if it'll fit on one line. As a minor
- // optimization, we skip this step for "generic" strings, which are probably
- // the most common thing.
- if (dfd.FormatSubType != FormatDescriptor.SubType.None || highAscii) {
- gath = new StringGather(this, labelStr, "???", commentStr, delim,
- delim, StringGather.ByteStyle.CommaSep, MAX_OPERAND_LEN, true);
- FeedGath(gath, data, offset, dfd.Length, leadingBytes, showLeading,
- trailingBytes, showTrailing);
- Debug.Assert(gath.NumLinesOutput > 0);
- }
-
- string opcodeStr = formatter.FormatPseudoOp(sDataOpNames.StrGeneric);
-
- switch (dfd.FormatType) {
- case FormatDescriptor.Type.StringGeneric:
- // Special case for simple short high-ASCII strings. These have no
- // leading or trailing bytes. We can improve this a bit by handling
- // arbitrarily long strings by simply breaking them across lines.
- Debug.Assert(leadingBytes == 0);
- Debug.Assert(trailingBytes == 0);
- if (highAscii && gath.NumLinesOutput == 1 && !gath.HasDelimiter) {
- if (!mHighAsciiMacroOutput) {
- mHighAsciiMacroOutput = true;
- // Output a macro for high-ASCII strings.
- OutputLine(".macro", "HiAscii", "Arg", string.Empty);
- OutputLine(string.Empty, ".repeat", ".strlen(Arg), I", string.Empty);
- OutputLine(string.Empty, ".byte", ".strat(Arg, I) | $80", string.Empty);
- OutputLine(string.Empty, ".endrep", string.Empty, string.Empty);
- OutputLine(".endmacro", string.Empty, string.Empty, string.Empty);
- }
- opcodeStr = formatter.FormatPseudoOp("HiAscii");
- highAscii = false;
- }
- break;
- case FormatDescriptor.Type.StringDci:
- case FormatDescriptor.Type.StringReverse:
- // Full configured above.
- break;
- case FormatDescriptor.Type.StringNullTerm:
- if (gath.NumLinesOutput == 1 && !gath.HasDelimiter) {
- opcodeStr = sDataOpNames.StrNullTerm;
- showTrailing = false;
- }
- break;
- case FormatDescriptor.Type.StringL8:
- case FormatDescriptor.Type.StringL16:
- // Implement macros?
- break;
- default:
- Debug.Assert(false);
- return;
- }
-
- if (highAscii) {
+ if (highAscii && dfd.FormatType != FormatDescriptor.Type.StringGeneric) {
OutputNoJoy(offset, dfd.Length, labelStr, commentStr);
return;
}
- // Create a new StringGather, with the final opcode choice.
- gath = new StringGather(this, labelStr, opcodeStr, commentStr, delim,
- delim, StringGather.ByteStyle.CommaSep, MAX_OPERAND_LEN, false);
- FeedGath(gath, data, offset, dfd.Length, leadingBytes, showLeading,
- trailingBytes, showTrailing);
- }
-
- ///
- /// Feeds the bytes into the StringGather.
- ///
- private void FeedGath(StringGather gath, byte[] data, int offset, int length,
- int leadingBytes, bool showLeading, int trailingBytes, bool showTrailing) {
- int startOffset = offset;
- int strEndOffset = offset + length - trailingBytes;
-
- if (showLeading) {
- while (leadingBytes-- > 0) {
- gath.WriteByte(data[offset++]);
- }
+ CharEncoding.Convert charConv;
+ if (highAscii) {
+ charConv = CharEncoding.ConvertHighAscii;
} else {
- offset += leadingBytes;
+ charConv = CharEncoding.ConvertLowAscii;
}
- for (; offset < strEndOffset; offset++) {
- gath.WriteChar((char)(data[offset] & 0x7f));
+
+ StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, '"',
+ StringOpFormatter.RawOutputStyle.CommaSep, MAX_OPERAND_LEN,
+ charConv);
+ stropf.FeedBytes(data, offset, dfd.Length - trailingBytes, leadingBytes, false);
+
+ string opcodeStr = formatter.FormatPseudoOp(sDataOpNames.StrGeneric);
+
+ if (highAscii) {
+ // Does this fit the narrow definition of what we can do with a macro?
+ Debug.Assert(dfd.FormatType == FormatDescriptor.Type.StringGeneric);
+ if (stropf.Lines.Count == 1 && !stropf.HasEscapedText) {
+ if (!mHighAsciiMacroOutput) {
+ mHighAsciiMacroOutput = true;
+ // Output a macro for high-ASCII strings.
+ OutputLine(".macro", "HiAscii", "Arg", string.Empty);
+ OutputLine(string.Empty, ".repeat", ".strlen(Arg), I", string.Empty);
+ OutputLine(string.Empty, ".byte", ".strat(Arg, I) | $80", string.Empty);
+ OutputLine(string.Empty, ".endrep", string.Empty, string.Empty);
+ OutputLine(".endmacro", string.Empty, string.Empty, string.Empty);
+ }
+ opcodeStr = formatter.FormatPseudoOp("HiAscii");
+ } else {
+ // didn't work out, dump hex
+ OutputNoJoy(offset, dfd.Length, labelStr, commentStr);
+ return;
+ }
}
- while (showTrailing && trailingBytes-- > 0) {
- gath.WriteByte(data[offset++]);
+
+ if (dfd.FormatType == FormatDescriptor.Type.StringNullTerm) {
+ if (stropf.Lines.Count == 1 && !stropf.HasEscapedText) {
+ // Keep it.
+ opcodeStr = sDataOpNames.StrNullTerm;
+ } else {
+ // Didn't fit, so re-emit it, this time with the terminating null byte.
+ stropf.Reset();
+ stropf.FeedBytes(data, offset, dfd.Length, leadingBytes, false);
+ }
+ }
+
+ foreach (string str in stropf.Lines) {
+ OutputLine(labelStr, opcodeStr, str, commentStr);
+ labelStr = commentStr = string.Empty; // only show on first
}
- gath.Finish();
}
}
diff --git a/SourceGen/AsmGen/AsmMerlin32.cs b/SourceGen/AsmGen/AsmMerlin32.cs
index c308979..08c42c4 100644
--- a/SourceGen/AsmGen/AsmMerlin32.cs
+++ b/SourceGen/AsmGen/AsmMerlin32.cs
@@ -448,8 +448,6 @@ namespace SourceGen.AsmGen {
}
- private enum RevMode { Forward, Reverse, BlockReverse };
-
private void OutputString(int offset, string labelStr, string commentStr) {
// This gets complicated.
//
@@ -467,6 +465,11 @@ namespace SourceGen.AsmGen {
// For aesthetic purposes, zero-length CString, L8String, and L16String
// should be output as DFB/DW zeroes rather than an empty string -- makes
// it easier to read.
+ //
+ // NOTE: we generally assume that the input is in the correct format, e.g.
+ // the length byte in a StringL8 matches dfd.Length, and the high bits in DCI strings
+ // have the right pattern. If not, we will generate bad output. This would need
+ // to be scanned and corrected at a higher level.
Formatter formatter = SourceFormatter;
byte[] data = Project.FileData;
@@ -477,127 +480,115 @@ namespace SourceGen.AsmGen {
Debug.Assert(dfd.Length > 0);
bool highAscii = false;
- int showZeroes = 0;
+ bool reverse = false;
int leadingBytes = 0;
- int trailingBytes = 0;
- bool showLeading = false;
- bool showTrailing = false;
- RevMode revMode = RevMode.Forward;
+ string opcodeStr;
switch (dfd.FormatType) {
case FormatDescriptor.Type.StringGeneric:
- highAscii = (data[offset] & 0x80) != 0;
- break;
- case FormatDescriptor.Type.StringDci:
+ opcodeStr = sDataOpNames.StrGeneric;
highAscii = (data[offset] & 0x80) != 0;
break;
case FormatDescriptor.Type.StringReverse:
+ opcodeStr = sDataOpNames.StrReverse;
highAscii = (data[offset] & 0x80) != 0;
- revMode = RevMode.Reverse;
+ reverse = true;
break;
case FormatDescriptor.Type.StringNullTerm:
+ opcodeStr = sDataOpNames.StrGeneric; // no pseudo-op for this
highAscii = (data[offset] & 0x80) != 0;
if (dfd.Length == 1) {
- showZeroes = 1; // empty null-terminated string
+ // Empty string. Just output the length byte(s) or null terminator.
+ GenerateShortSequence(offset, 1, out string opcode, out string operand);
+ OutputLine(labelStr, opcode, operand, commentStr);
+ return;
}
- trailingBytes = 1;
- showTrailing = true;
break;
case FormatDescriptor.Type.StringL8:
+ opcodeStr = sDataOpNames.StrLen8;
if (dfd.Length > 1) {
highAscii = (data[offset + 1] & 0x80) != 0;
- } else {
- //showZeroes = 1;
}
leadingBytes = 1;
break;
case FormatDescriptor.Type.StringL16:
+ opcodeStr = sDataOpNames.StrLen16;
if (dfd.Length > 2) {
highAscii = (data[offset + 2] & 0x80) != 0;
- } else {
- //showZeroes = 2;
}
leadingBytes = 2;
break;
+ case FormatDescriptor.Type.StringDci:
+ opcodeStr = sDataOpNames.StrDci;
+ highAscii = (data[offset] & 0x80) != 0;
+ break;
default:
Debug.Assert(false);
return;
}
- if (showZeroes != 0) {
- // Empty string. Just output the length byte(s) or null terminator.
- GenerateShortSequence(offset, showZeroes, out string opcode, out string operand);
- OutputLine(labelStr, opcode, operand, commentStr);
- return;
- }
-
// Merlin 32 uses single-quote for low ASCII, double-quote for high ASCII. When
// quoting the delimiter we use a hexadecimal value. We need to bear in mind that
// we're forcing the characters to low ASCII, but the actual character being
// escaped might be in high ASCII. Hence delim vs. delimReplace.
char delim = highAscii ? '"' : '\'';
- char delimReplace = highAscii ? ((char)(delim | 0x80)) : delim;
- StringGather gath = null;
-
- // Run the string through so we can see if it'll fit on one line. As a minor
- // optimization, we skip this step for "generic" strings, which are probably
- // the most common thing.
- if (dfd.FormatSubType != FormatDescriptor.SubType.None) {
- gath = new StringGather(this, labelStr, "???", commentStr, delim,
- delimReplace, StringGather.ByteStyle.DenseHex, MAX_OPERAND_LEN, true);
- FeedGath(gath, data, offset, dfd.Length, revMode, leadingBytes, showLeading,
- trailingBytes, showTrailing);
- Debug.Assert(gath.NumLinesOutput > 0);
+ CharEncoding.Convert charConv;
+ if (highAscii) {
+ charConv = CharEncoding.ConvertHighAscii;
+ } else {
+ charConv = CharEncoding.ConvertLowAscii;
}
- string opcodeStr;
+ StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, delim,
+ StringOpFormatter.RawOutputStyle.DenseHex, MAX_OPERAND_LEN, charConv);
+ if (dfd.FormatType == FormatDescriptor.Type.StringDci) {
+ // DCI is awkward because the character encoding flips on the last byte. Rather
+ // than clutter up StringOpFormatter for this rare item, we just accept both
+ // throughout.
+ stropf.CharConv = CharEncoding.ConvertLowAndHighAscii;
+ }
+ // Feed bytes in, skipping over the leading length bytes.
+ stropf.FeedBytes(data, offset + leadingBytes,
+ dfd.Length - leadingBytes, 0, reverse);
+ Debug.Assert(stropf.Lines.Count > 0);
+
+ // See if we need to do this over.
+ bool redo = false;
switch (dfd.FormatType) {
case FormatDescriptor.Type.StringGeneric:
- opcodeStr = highAscii ? sDataOpNames.StrGenericHi : sDataOpNames.StrGeneric;
- break;
- case FormatDescriptor.Type.StringDci:
- if (gath.NumLinesOutput == 1) {
- opcodeStr = highAscii ? sDataOpNames.StrDciHi : sDataOpNames.StrDci;
- } else {
- opcodeStr = highAscii ? sDataOpNames.StrGenericHi : sDataOpNames.StrGeneric;
- trailingBytes = 1;
- showTrailing = true;
- }
+ case FormatDescriptor.Type.StringNullTerm:
break;
case FormatDescriptor.Type.StringReverse:
- if (gath.HasDelimiter) {
- // can't include escaped delimiters in REV
- opcodeStr = highAscii ? sDataOpNames.StrGenericHi : sDataOpNames.StrGeneric;
- revMode = RevMode.Forward;
- } else if (gath.NumLinesOutput > 1) {
- opcodeStr = highAscii ? sDataOpNames.StrReverseHi : sDataOpNames.StrReverse;
- revMode = RevMode.BlockReverse;
- } else {
- opcodeStr = highAscii ? sDataOpNames.StrReverseHi : sDataOpNames.StrReverse;
- Debug.Assert(revMode == RevMode.Reverse);
+ if (stropf.HasEscapedText) {
+ // can't include escaped characters in REV
+ opcodeStr = sDataOpNames.StrGeneric;
+ reverse = false;
+ redo = true;
}
break;
- case FormatDescriptor.Type.StringNullTerm:
- //opcodeStr = sDataOpNames.StrNullTerm[highAscii ? 1 : 0];
- opcodeStr = highAscii ? sDataOpNames.StrGenericHi : sDataOpNames.StrGeneric;
- break;
case FormatDescriptor.Type.StringL8:
- if (gath.NumLinesOutput == 1) {
- opcodeStr = highAscii ? sDataOpNames.StrLen8Hi : sDataOpNames.StrLen8;
- } else {
- opcodeStr = highAscii ? sDataOpNames.StrGenericHi : sDataOpNames.StrGeneric;
+ if (stropf.Lines.Count != 1) {
+ // single-line only
+ opcodeStr = sDataOpNames.StrGeneric;
leadingBytes = 1;
- showLeading = true;
+ redo = true;
}
break;
case FormatDescriptor.Type.StringL16:
- if (gath.NumLinesOutput == 1) {
- opcodeStr = highAscii ? sDataOpNames.StrLen16Hi : sDataOpNames.StrLen16;
- } else {
- opcodeStr = highAscii ? sDataOpNames.StrGenericHi : sDataOpNames.StrGeneric;
+ if (stropf.Lines.Count != 1) {
+ // single-line only
+ opcodeStr = sDataOpNames.StrGeneric;
leadingBytes = 2;
- showLeading = true;
+ redo = true;
+ }
+ break;
+ case FormatDescriptor.Type.StringDci:
+ if (stropf.Lines.Count != 1) {
+ // single-line only
+ opcodeStr = sDataOpNames.StrGeneric;
+ stropf.CharConv = charConv;
+ redo = true;
}
break;
default:
@@ -605,61 +596,21 @@ namespace SourceGen.AsmGen {
return;
}
+ if (redo) {
+ //Debug.WriteLine("REDO off=+" + offset.ToString("x6") + ": " + dfd.FormatType);
+
+ // This time, instead of skipping over leading length bytes, we include them
+ // explicitly.
+ stropf.Reset();
+ stropf.FeedBytes(data, offset, dfd.Length, leadingBytes, reverse);
+ }
+
opcodeStr = formatter.FormatPseudoOp(opcodeStr);
- // Create a new StringGather, with the final opcode choice.
- gath = new StringGather(this, labelStr, opcodeStr, commentStr, delim,
- delimReplace, StringGather.ByteStyle.DenseHex, MAX_OPERAND_LEN, false);
- FeedGath(gath, data, offset, dfd.Length, revMode, leadingBytes, showLeading,
- trailingBytes, showTrailing);
- }
-
- ///
- /// Feeds the bytes into the StringGather.
- ///
- private void FeedGath(StringGather gath, byte[] data, int offset, int length,
- RevMode revMode, int leadingBytes, bool showLeading,
- int trailingBytes, bool showTrailing) {
- int startOffset = offset;
- int strEndOffset = offset + length - trailingBytes;
-
- if (showLeading) {
- while (leadingBytes-- > 0) {
- gath.WriteByte(data[offset++]);
- }
- } else {
- offset += leadingBytes;
+ foreach (string str in stropf.Lines) {
+ OutputLine(labelStr, opcodeStr, str, commentStr);
+ labelStr = commentStr = string.Empty; // only show on first
}
- if (revMode == RevMode.BlockReverse) {
- const int maxPerLine = MAX_OPERAND_LEN - 2;
- int numBlockLines = (length + maxPerLine - 1) / maxPerLine;
-
- for (int chunk = 0; chunk < numBlockLines; chunk++) {
- int chunkOffset = startOffset + chunk * maxPerLine;
- int endOffset = chunkOffset + maxPerLine;
- if (endOffset > strEndOffset) {
- endOffset = strEndOffset;
- }
- for (int off = endOffset - 1; off >= chunkOffset; off--) {
- gath.WriteChar((char)(data[off] & 0x7f));
- }
- }
- } else {
- for (; offset < strEndOffset; offset++) {
- if (revMode == RevMode.Forward) {
- gath.WriteChar((char)(data[offset] & 0x7f));
- } else if (revMode == RevMode.Reverse) {
- int posn = startOffset + (strEndOffset - offset) - 1;
- gath.WriteChar((char)(data[posn] & 0x7f));
- } else {
- Debug.Assert(false);
- }
- }
- }
- while (showTrailing && trailingBytes-- > 0) {
- gath.WriteByte(data[offset++]);
- }
- gath.Finish();
}
}
diff --git a/SourceGen/AsmGen/AsmTass64.cs b/SourceGen/AsmGen/AsmTass64.cs
index 1bc7fe2..55035c4 100644
--- a/SourceGen/AsmGen/AsmTass64.cs
+++ b/SourceGen/AsmGen/AsmTass64.cs
@@ -533,85 +533,36 @@ namespace SourceGen.AsmGen {
Debug.Assert(dfd.Length > 0);
bool highAscii = false;
- int leadingBytes = 0;
+ int hiddenLeadingBytes = 0;
+ int shownLeadingBytes = 0;
int trailingBytes = 0;
- bool showLeading = false;
- bool showTrailing = false;
+ string opcodeStr;
switch (dfd.FormatType) {
case FormatDescriptor.Type.StringGeneric:
- highAscii = (data[offset] & 0x80) != 0;
- break;
- case FormatDescriptor.Type.StringDci:
- highAscii = (data[offset] & 0x80) != 0;
- trailingBytes = 1;
- showTrailing = true;
- break;
case FormatDescriptor.Type.StringReverse:
+ case FormatDescriptor.Type.StringDci:
+ opcodeStr = sDataOpNames.StrGeneric;
highAscii = (data[offset] & 0x80) != 0;
break;
case FormatDescriptor.Type.StringNullTerm:
+ opcodeStr = sDataOpNames.StrNullTerm;
highAscii = (data[offset] & 0x80) != 0;
trailingBytes = 1;
- showTrailing = true;
break;
case FormatDescriptor.Type.StringL8:
+ opcodeStr = sDataOpNames.StrLen8;
if (dfd.Length > 1) {
highAscii = (data[offset + 1] & 0x80) != 0;
}
- leadingBytes = 1;
- showLeading = true;
+ hiddenLeadingBytes = 1;
break;
case FormatDescriptor.Type.StringL16:
+ opcodeStr = sDataOpNames.StrGeneric;
if (dfd.Length > 2) {
highAscii = (data[offset + 2] & 0x80) != 0;
}
- leadingBytes = 2;
- showLeading = true;
- break;
- default:
- Debug.Assert(false);
- return;
- }
-
- char delim = '"';
- StringGather gath = null;
-
- // Run the string through so we can see if it'll fit on one line. As a minor
- // optimization, we skip this step for "generic" strings, which are probably
- // the most common thing.
- if (dfd.FormatSubType != FormatDescriptor.SubType.None || highAscii) {
- gath = new StringGather(this, labelStr, "???", commentStr, delim,
- delim, StringGather.ByteStyle.CommaSep, MAX_OPERAND_LEN, true);
- FeedGath(gath, data, offset, dfd.Length, leadingBytes, showLeading,
- trailingBytes, showTrailing);
- Debug.Assert(gath.NumLinesOutput > 0);
- }
-
- string opcodeStr = formatter.FormatPseudoOp(sDataOpNames.StrGeneric);
-
- switch (dfd.FormatType) {
- case FormatDescriptor.Type.StringGeneric:
- // TODO(someday): something fancy with encodings to handle high-ASCII text?
- break;
- case FormatDescriptor.Type.StringDci:
- case FormatDescriptor.Type.StringReverse:
- // Fully configured above.
- break;
- case FormatDescriptor.Type.StringNullTerm:
- if (gath.NumLinesOutput == 1 && !gath.HasDelimiter) {
- opcodeStr = sDataOpNames.StrNullTerm;
- showTrailing = false;
- }
- break;
- case FormatDescriptor.Type.StringL8:
- if (gath.NumLinesOutput == 1 && !gath.HasDelimiter) {
- opcodeStr = sDataOpNames.StrLen8;
- showLeading = false;
- }
- break;
- case FormatDescriptor.Type.StringL16:
- // Implement as macro?
+ shownLeadingBytes = 2;
break;
default:
Debug.Assert(false);
@@ -623,35 +574,58 @@ namespace SourceGen.AsmGen {
return;
}
- // Create a new StringGather, with the final opcode choice.
- gath = new StringGather(this, labelStr, opcodeStr, commentStr, delim,
- delim, StringGather.ByteStyle.CommaSep, MAX_OPERAND_LEN, false);
- FeedGath(gath, data, offset, dfd.Length, leadingBytes, showLeading,
- trailingBytes, showTrailing);
- }
+ StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, '"',
+ StringOpFormatter.RawOutputStyle.CommaSep, MAX_OPERAND_LEN,
+ CharEncoding.ConvertLowAscii);
- ///
- /// Feeds the bytes into the StringGather.
- ///
- private void FeedGath(StringGather gath, byte[] data, int offset, int length,
- int leadingBytes, bool showLeading, int trailingBytes, bool showTrailing) {
- int startOffset = offset;
- int strEndOffset = offset + length - trailingBytes;
+ // Feed bytes in, skipping over hidden bytes (leading L8, trailing null).
+ stropf.FeedBytes(data, offset + hiddenLeadingBytes,
+ dfd.Length - hiddenLeadingBytes - trailingBytes, shownLeadingBytes, false);
+ Debug.Assert(stropf.Lines.Count > 0);
- if (showLeading) {
- while (leadingBytes-- > 0) {
- gath.WriteByte(data[offset++]);
- }
- } else {
- offset += leadingBytes;
+ // See if we need to do this over.
+ bool redo = false;
+ switch (dfd.FormatType) {
+ case FormatDescriptor.Type.StringGeneric:
+ case FormatDescriptor.Type.StringReverse:
+ case FormatDescriptor.Type.StringL16:
+ case FormatDescriptor.Type.StringDci:
+ // All good the first time.
+ break;
+ case FormatDescriptor.Type.StringNullTerm:
+ if (stropf.Lines.Count != 1 || stropf.HasEscapedText) {
+ // Must be single-line without quoted chars.
+ opcodeStr = sDataOpNames.StrGeneric;
+ redo = true;
+ }
+ break;
+ case FormatDescriptor.Type.StringL8:
+ if (stropf.Lines.Count != 1 || stropf.HasEscapedText) {
+ // Must be single-line without quoted chars.
+ opcodeStr = sDataOpNames.StrGeneric;
+ redo = true;
+ }
+ break;
+ default:
+ Debug.Assert(false);
+ return;
}
- for (; offset < strEndOffset; offset++) {
- gath.WriteChar((char)(data[offset] & 0x7f));
+
+ if (redo) {
+ //Debug.WriteLine("REDO off=+" + offset.ToString("x6") + ": " + dfd.FormatType);
+
+ // This time, instead of skipping over leading length bytes, we include them
+ // explicitly.
+ stropf.Reset();
+ stropf.FeedBytes(data, offset, dfd.Length, hiddenLeadingBytes, false);
}
- while (showTrailing && trailingBytes-- > 0) {
- gath.WriteByte(data[offset++]);
+
+ opcodeStr = formatter.FormatPseudoOp(opcodeStr);
+
+ foreach (string str in stropf.Lines) {
+ OutputLine(labelStr, opcodeStr, str, commentStr);
+ labelStr = commentStr = string.Empty; // only show on first
}
- gath.Finish();
}
}
diff --git a/SourceGen/SourceGen.csproj b/SourceGen/SourceGen.csproj
index 619e1fd..0b48b8c 100644
--- a/SourceGen/SourceGen.csproj
+++ b/SourceGen/SourceGen.csproj
@@ -73,7 +73,6 @@
-