1
0
mirror of https://github.com/fadden/6502bench.git synced 2024-08-14 03:29:00 +00:00
6502bench/Asm65/StringOpFormatter.cs
Andy McFadden dae76d9b45 Rework string operand formatting
This generalizes the string pseudo-operand formatter, moving it into
the Asm65 library.  The assembly source generators have been updated
to use it.  This makes the individual generators simpler, and by
virtue of avoiding "test runs" should make them slightly faster.

This also introduces byte-to-character converters, though we're
currently still only supporting low/high ASCII.

Regression test output is unchanged.
2019-08-09 17:46:33 -07:00

270 lines
10 KiB
C#

/*
* Copyright 2019 faddenSoft
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.Diagnostics;
namespace Asm65 {
/// <summary>
/// String pseudo-op formatter. Handles character encoding conversion and quoting of
/// delimiters and non-printable characters.
/// </summary>
public class StringOpFormatter {
public CharEncoding.Convert CharConv { get; set; }
private char Delimiter { get; set; }
private RawOutputStyle RawStyle { get; set; }
private int MaxOperandLen { get; set; }
// Output format for raw (non-printable) characters. Most assemblers use comma-separated
// hex values, some allow dense hex strings.
public enum RawOutputStyle { DenseHex, CommaSep };
// Outputs.
public bool HasEscapedText { get; private set; }
public List<string> Lines { get; private set; }
// Reference to array with 16 hex digits. (May be upper or lower case.)
private char[] mHexChars;
/// <summary>
/// Character collection buffer. The delimiters are written into the buffer
/// because they're mixed with bytes, particularly when we have to escape the
/// delimiter character. Strings might start or end with escaped delimiters,
/// so we don't add them until we have to.
/// </summary>
private char[] mBuffer;
/// <summary>
/// Next available character position.
/// </summary>
private int mIndex;
/// <summary>
/// State of the buffer, based on the last thing we added.
/// </summary>
private enum State {
Unknown = 0,
StartOfLine,
InQuote,
OutQuote,
Finished
}
private State mState;
/// <summary>
/// Constructor.
/// </summary>
/// <param name="formatter">Reference to text formatter.</param>
/// <param name="delimiter">String delimiter character.</param>
/// <param name="byteStyle">How to format raw byte data.</param>
/// <param name="maxOperandLen">Maximum line length.</param>
/// <param name="charConv">Character conversion delegate.</param>
public StringOpFormatter(Formatter formatter, char delimiter, RawOutputStyle byteStyle,
int maxOperandLen, CharEncoding.Convert charConv) {
Delimiter = delimiter;
RawStyle = byteStyle;
MaxOperandLen = maxOperandLen;
CharConv = charConv;
mBuffer = new char[MaxOperandLen];
mHexChars = formatter.HexDigits;
Lines = new List<string>();
Reset();
}
public void Reset() {
mState = State.StartOfLine;
mIndex = 0;
Lines.Clear();
}
/// <summary>
/// Write a character into the buffer. If the character matches the delimiter, or
/// isn't printable, the raw character value will be written as a byte instead.
/// </summary>
/// <param name="rawCh">Raw character value.</param>
public void WriteChar(byte rawCh) {
Debug.Assert(mState != State.Finished);
char ch = CharConv(rawCh);
if (ch == Delimiter || ch == CharEncoding.UNPRINTABLE_CHAR) {
// Must write it as a byte.
WriteByte(rawCh);
return;
}
// If we're at the start of a line, add delimiter, then new char.
// If we're inside quotes, just add the character. We must have space for
// two chars (new char, close quote).
// If we're outside quotes, add a comma and delimiter, then the character.
// We must have 4 chars remaining (comma, open quote, new char, close quote).
switch (mState) {
case State.StartOfLine:
mBuffer[mIndex++] = Delimiter;
break;
case State.InQuote:
if (mIndex + 2 > MaxOperandLen) {
Flush();
mBuffer[mIndex++] = Delimiter;
}
break;
case State.OutQuote:
if (mIndex + 4 > MaxOperandLen) {
Flush();
mBuffer[mIndex++] = Delimiter;
} else {
mBuffer[mIndex++] = ',';
mBuffer[mIndex++] = Delimiter;
}
break;
default:
Debug.Assert(false);
break;
}
mBuffer[mIndex++] = ch;
mState = State.InQuote;
}
/// <summary>
/// Write a hex value into the buffer.
/// </summary>
/// <param name="val">Value to add.</param>
public void WriteByte(byte val) {
Debug.Assert(mState != State.Finished);
HasEscapedText = true;
// If we're at the start of a line, just output the byte.
// If we're inside quotes, emit a delimiter, comma, and the byte. We must
// have space for four (DenseHex) or five (CommaSep) chars.
// If we're outside quotes, add the byte. We must have two (DenseHex) or
// four (CommaSep) chars remaining.
switch (mState) {
case State.StartOfLine:
break;
case State.InQuote:
int minWidth = (RawStyle == RawOutputStyle.CommaSep) ? 5 : 4;
if (mIndex + minWidth > MaxOperandLen) {
Flush();
} else {
mBuffer[mIndex++] = Delimiter;
mBuffer[mIndex++] = ',';
}
break;
case State.OutQuote:
minWidth = (RawStyle == RawOutputStyle.CommaSep) ? 4 : 2;
if (mIndex + minWidth > MaxOperandLen) {
Flush();
} else {
if (RawStyle == RawOutputStyle.CommaSep) {
mBuffer[mIndex++] = ',';
}
}
break;
default:
Debug.Assert(false);
break;
}
if (RawStyle == RawOutputStyle.CommaSep) {
mBuffer[mIndex++] = '$';
}
mBuffer[mIndex++] = mHexChars[val >> 4];
mBuffer[mIndex++] = mHexChars[val & 0x0f];
mState = State.OutQuote;
}
/// <summary>
/// Tells the object to flush any pending data to the output.
/// </summary>
public void Finish() {
Flush();
}
/// <summary>
/// Outputs the buffer of pending data. A closing delimiter will be added if needed.
/// </summary>
private void Flush() {
switch (mState) {
case State.StartOfLine:
// empty string; put out a pair of delimiters
mBuffer[mIndex++] = Delimiter;
mBuffer[mIndex++] = Delimiter;
break;
case State.InQuote:
// add delimiter and finish
mBuffer[mIndex++] = Delimiter;
break;
case State.OutQuote:
// just output it
break;
}
string newStr = new string(mBuffer, 0, mIndex);
Debug.Assert(newStr.Length <= MaxOperandLen);
Lines.Add(newStr);
mState = State.Finished;
mIndex = 0;
}
/// <summary>
/// Feeds the bytes into the StringGather.
/// </summary>
public void FeedBytes(byte[] data, int offset, int length, int leadingBytes,
bool reverse) {
int startOffset = offset;
int strEndOffset = offset + length;
// Write leading bytes. This is used for the 8- or 16-bit length (when no
// appropriate pseudo-op is available), because we want to output that as hex
// even if it maps to a printable character.
while (leadingBytes-- > 0) {
WriteByte(data[offset++]);
}
if (reverse) {
// Max per line is line length minus the two delimiters. We don't allow
// any hex quoting in reversed text, so this always works. (If somebody
// does try to reverse text with delimiters or unprintable chars, we'll
// blow out the line limit, but for a cross-assembler that should be purely
// cosmetic.)
int maxPerLine = MaxOperandLen - 2;
int numBlockLines = (length + maxPerLine - 1) / maxPerLine;
for (int chunk = 0; chunk < numBlockLines; chunk++) {
int chunkOffset = startOffset + chunk * maxPerLine;
int endOffset = chunkOffset + maxPerLine;
if (endOffset > strEndOffset) {
endOffset = strEndOffset;
}
for (int off = endOffset - 1; off >= chunkOffset; off--) {
WriteChar(data[off]);
}
}
} else {
for (; offset < strEndOffset; offset++) {
WriteChar(data[offset]);
}
}
Finish();
}
}
}