6502bench/Asm65/StringOpFormatter.cs

314 lines
12 KiB
C#

/*
* Copyright 2019 faddenSoft
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.Diagnostics;
namespace Asm65 {
/// <summary>
/// String pseudo-op formatter. Handles character encoding conversion and quoting of
/// delimiters and non-printable characters.
/// </summary>
public class StringOpFormatter {
/// <summary>
/// Text direction. If text is stored in reverse order, we want to un-reverse it to
/// make it readable. This gets tricky for a multi-line item. For the assembler we
/// want to break it into lines and then reverse each chunk, but on screen we want to
/// reverse the entire thing as a single block.
/// </summary>
public enum ReverseMode { Forward, LineReverse, FullReverse };
/// <summary>
/// Character encoding conversion delegate. This function converts a raw byte value
/// to a printable value, or CharEncoding.UNPRINTABLE_CHAR.
/// </summary>
public CharEncoding.Convert CharConv { get; set; }
/// <summary>
/// True if the input bytes are a DCI string. Only compatible with ReverseMode==Forward.
/// </summary>
public bool IsDciString { get; set; } = false;
// Output format for raw (non-printable) characters. Most assemblers use comma-separated
// hex values, some allow dense hex strings.
public enum RawOutputStyle { DenseHex, CommaSep };
// Outputs.
public bool HasEscapedText { get; private set; }
public List<string> Lines { get; private set; }
private Formatter.DelimiterDef mDelimiterDef;
private RawOutputStyle mRawStyle;
private bool mBackslashEscapes;
private int mMaxOperandLen;
// Reference to array with 16 hex digits. (May be upper or lower case.)
private char[] mHexChars;
/// <summary>
/// Character collection buffer. The delimiters are written into the buffer
/// because they're mixed with bytes, particularly when we have to escape the
/// delimiter character. Strings might start or end with escaped delimiters,
/// so we don't add them until we have to.
/// </summary>
private char[] mBuffer;
/// <summary>
/// Next available character position.
/// </summary>
private int mIndex;
/// <summary>
/// State of the buffer, based on the last thing we added.
/// </summary>
private enum State {
Unknown = 0,
StartOfLine,
InQuote,
OutQuote,
Finished
}
private State mState;
/// <summary>
/// Constructor.
/// </summary>
/// <param name="formatter">Reference to text formatter.</param>
/// <param name="delimiterDef">String delimiter values.</param>
/// <param name="byteStyle">How to format raw byte data.</param>
/// <param name="charConv">Character conversion delegate.</param>
/// <param name="backslashEscapes">True if "\" must be escaped with "\\".</param>
public StringOpFormatter(Formatter formatter, Formatter.DelimiterDef delimiterDef,
RawOutputStyle byteStyle, CharEncoding.Convert charConv,
bool backslashEscapes) {
mDelimiterDef = delimiterDef;
mRawStyle = byteStyle;
CharConv = charConv;
mBackslashEscapes = backslashEscapes;
mMaxOperandLen = formatter.OperandWrapLen;
mHexChars = formatter.HexDigits;
mBuffer = new char[mMaxOperandLen];
Lines = new List<string>();
// suffix not used, so we don't expect it to be set to something
Debug.Assert(string.IsNullOrEmpty(mDelimiterDef.Suffix));
Reset();
}
public void Reset() {
mState = State.StartOfLine;
mIndex = 0;
Lines.Clear();
// Copy the prefix string into the buffer for the first line.
for (int i = 0; i < mDelimiterDef.Prefix.Length; i++) {
mBuffer[mIndex++] = mDelimiterDef.Prefix[i];
}
}
/// <summary>
/// Write a character into the buffer. If the character matches the delimiter, or
/// isn't printable, the raw character value will be written as a byte instead.
/// </summary>
/// <param name="rawCh">Raw character value.</param>
private void WriteChar(byte rawCh, bool recurOkay = true) {
Debug.Assert(mState != State.Finished);
char ch = CharConv(rawCh);
if (ch == mDelimiterDef.OpenDelim || ch == mDelimiterDef.CloseDelim ||
ch == CharEncoding.UNPRINTABLE_CHAR) {
// Must write it as a byte.
WriteByte(rawCh);
return;
} else if (ch == '\\' && recurOkay && mBackslashEscapes) {
// Recursively output two '\' instead of just one.
WriteChar(rawCh, false);
}
// If we're at the start of a line, add delimiter, then new char.
// If we're inside quotes, just add the character. We must have space for
// two chars (new char, close quote).
// If we're outside quotes, add a comma and delimiter, then the character.
// We must have 4 chars remaining (comma, open quote, new char, close quote).
switch (mState) {
case State.StartOfLine:
mBuffer[mIndex++] = mDelimiterDef.OpenDelim;
break;
case State.InQuote:
if (mIndex + 2 > mMaxOperandLen) {
Flush();
mBuffer[mIndex++] = mDelimiterDef.OpenDelim;
}
break;
case State.OutQuote:
if (mIndex + 4 > mMaxOperandLen) {
Flush();
mBuffer[mIndex++] = mDelimiterDef.OpenDelim;
} else {
mBuffer[mIndex++] = ',';
mBuffer[mIndex++] = mDelimiterDef.OpenDelim;
}
break;
default:
Debug.Assert(false);
break;
}
mBuffer[mIndex++] = ch;
mState = State.InQuote;
}
/// <summary>
/// Write a hex value into the buffer.
/// </summary>
/// <param name="val">Value to add.</param>
private void WriteByte(byte val) {
Debug.Assert(mState != State.Finished);
HasEscapedText = true;
// If we're at the start of a line, just output the byte.
// If we're inside quotes, emit a delimiter, comma, and the byte. We must
// have space for four (DenseHex) or five (CommaSep) chars.
// If we're outside quotes, add the byte. We must have two (DenseHex) or
// four (CommaSep) chars remaining.
switch (mState) {
case State.StartOfLine:
break;
case State.InQuote:
int minWidth = (mRawStyle == RawOutputStyle.CommaSep) ? 5 : 4;
if (mIndex + minWidth > mMaxOperandLen) {
Flush();
} else {
mBuffer[mIndex++] = mDelimiterDef.CloseDelim;
mBuffer[mIndex++] = ',';
}
break;
case State.OutQuote:
minWidth = (mRawStyle == RawOutputStyle.CommaSep) ? 4 : 2;
if (mIndex + minWidth > mMaxOperandLen) {
Flush();
} else {
if (mRawStyle == RawOutputStyle.CommaSep) {
mBuffer[mIndex++] = ',';
}
}
break;
default:
Debug.Assert(false);
break;
}
if (mRawStyle == RawOutputStyle.CommaSep) {
mBuffer[mIndex++] = '$';
}
mBuffer[mIndex++] = mHexChars[val >> 4];
mBuffer[mIndex++] = mHexChars[val & 0x0f];
mState = State.OutQuote;
}
/// <summary>
/// Tells the object to flush any pending data to the output.
/// </summary>
private void Finish() {
Flush();
}
/// <summary>
/// Outputs the buffer of pending data. A closing delimiter will be added if needed.
/// </summary>
private void Flush() {
switch (mState) {
case State.StartOfLine:
// empty string; put out a pair of delimiters
mBuffer[mIndex++] = mDelimiterDef.OpenDelim;
mBuffer[mIndex++] = mDelimiterDef.CloseDelim;
break;
case State.InQuote:
// add delimiter and finish
mBuffer[mIndex++] = mDelimiterDef.CloseDelim;
break;
case State.OutQuote:
// just output it
break;
}
string newStr = new string(mBuffer, 0, mIndex);
Debug.Assert(newStr.Length <= mMaxOperandLen);
Lines.Add(newStr);
mState = State.Finished;
mIndex = 0;
}
/// <summary>
/// Feeds the bytes into the StringGather.
/// </summary>
public void FeedBytes(byte[] data, int offset, int length, int leadingBytes,
ReverseMode revMode) {
Debug.Assert(!IsDciString || revMode == ReverseMode.Forward);
int startOffset = offset;
int strEndOffset = offset + length;
// Write leading bytes. This is used for the 8- or 16-bit length (when no
// appropriate pseudo-op is available), because we want to output that as hex
// even if it maps to a printable character.
while (leadingBytes-- > 0) {
WriteByte(data[offset++]);
}
if (revMode == ReverseMode.LineReverse) {
// Max per line is line length minus the two delimiters. We don't allow
// any hex quoting in reversed text, so this always works. (If somebody
// does try to reverse text with delimiters or unprintable chars, we'll
// blow out the line limit, but for a cross-assembler that should be purely
// cosmetic.)
int maxPerLine = mMaxOperandLen - 2;
int numBlockLines = (length + maxPerLine - 1) / maxPerLine;
for (int chunk = 0; chunk < numBlockLines; chunk++) {
int chunkOffset = startOffset + chunk * maxPerLine;
int endOffset = chunkOffset + maxPerLine;
if (endOffset > strEndOffset) {
endOffset = strEndOffset;
}
for (int off = endOffset - 1; off >= chunkOffset; off--) {
WriteChar(data[off]);
}
}
} else if (revMode == ReverseMode.FullReverse) {
for (; offset < strEndOffset; offset++) {
int posn = startOffset + (strEndOffset - offset) - 1;
WriteChar(data[posn]);
}
} else {
Debug.Assert(revMode == ReverseMode.Forward);
for (; offset < strEndOffset; offset++) {
byte val = data[offset];
if (IsDciString && offset == strEndOffset - 1) {
val ^= 0x80;
}
WriteChar(val);
}
}
Finish();
}
}
}