/*
* Copyright 2018 faddenSoft
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Reflection;
using System.Text;
using System.Web.Script.Serialization;
using Asm65;
using CommonUtil;
namespace SourceGen {
///
/// Data pseudo-op formatter. Long operands, notably strings and dense hex blocks, may
/// be broken across multiple lines.
///
/// Assembler output will use Opcode and Operand, emitting multiple lines of ASC, HEX,
/// etc. The display list may treat it as a single item that is split across
/// multiple lines.
///
public class PseudoOp {
private const int MAX_OPERAND_LEN = 64;
///
/// One piece of the operand.
///
public struct PseudoOut {
///
/// Opcode. Same for all entries in the list.
///
public string Opcode { get; set; }
///
/// Formatted form of this piece of the operand.
///
public string Operand { get; set; }
///
/// Copy constructor.
///
public PseudoOut(PseudoOut src) {
Opcode = src.Opcode;
Operand = src.Operand;
}
}
///
/// Pseudo-op name collection. Name strings may be null.
///
public class PseudoOpNames {
public string EquDirective { get; set; }
public string OrgDirective { get; set; }
public string RegWidthDirective { get; set; }
public string DefineData1 { get; set; }
public string DefineData2 { get; set; }
public string DefineData3 { get; set; }
public string DefineData4 { get; set; }
public string DefineBigData2 { get; set; }
public string DefineBigData3 { get; set; }
public string DefineBigData4 { get; set; }
public string Fill { get; set; }
public string Dense { get; set; }
public string StrGeneric { get; set; }
public string StrGenericHi { get; set; }
public string StrReverse { get; set; }
public string StrReverseHi { get; set; }
public string StrLen8 { get; set; }
public string StrLen8Hi { get; set; }
public string StrLen16 { get; set; }
public string StrLen16Hi { get; set; }
public string StrNullTerm { get; set; }
public string StrNullTermHi { get; set; }
public string StrDci { get; set; }
public string StrDciHi { get; set; }
public string StrDciReverse { get; set; }
public string StrDciReverseHi { get; set; }
public string GetDefineData(int width) {
switch (width) {
case 1: return DefineData1;
case 2: return DefineData2;
case 3: return DefineData3;
case 4: return DefineData4;
default: Debug.Assert(false); return ".?!!";
}
}
public string GetDefineBigData(int width) {
switch (width) {
case 1: return DefineData1;
case 2: return DefineBigData2;
case 3: return DefineBigData3;
case 4: return DefineBigData4;
default: Debug.Assert(false); return ".!!?";
}
}
public PseudoOpNames GetCopy() {
// Do it the lazy way.
return Deserialize(Serialize());
}
///
/// Merges the non-null, non-empty strings in "other" into this instance.
///
public void Merge(PseudoOpNames other) {
// Lots of fields, we don't do this often... use reflection.
Type type = GetType();
PropertyInfo[] props = type.GetProperties();
foreach (PropertyInfo pi in props) {
string str = (string)pi.GetValue(other);
if (string.IsNullOrEmpty(str)) {
continue;
}
pi.SetValue(this, str);
}
}
public string Serialize() {
// This results in a JSON-encoded string being stored in a JSON-encoded file,
// which means a lot of double-quote escaping. We could do something here
// that stored more nicely but it doesn't seem worth the effort.
JavaScriptSerializer ser = new JavaScriptSerializer();
return ser.Serialize(this);
}
public static PseudoOpNames Deserialize(string cereal) {
JavaScriptSerializer ser = new JavaScriptSerializer();
try {
return ser.Deserialize(cereal);
} catch (Exception ex) {
Debug.WriteLine("PseudoOpNames deserialization failed: " + ex.Message);
return new PseudoOpNames();
}
}
}
///
/// Some reasonable defaults for on-screen display. The object is mutable, so make
/// a copy of it.
///
public static readonly PseudoOpNames sDefaultPseudoOpNames = new PseudoOpNames() {
EquDirective = ".eq",
OrgDirective = ".org",
RegWidthDirective = ".rwid",
DefineData1 = ".dd1",
DefineData2 = ".dd2",
DefineData3 = ".dd3",
DefineData4 = ".dd4",
DefineBigData2 = ".dbd2",
DefineBigData3 = ".dbd3",
DefineBigData4 = ".dbd4",
Fill = ".fill",
Dense = ".bulk",
StrGeneric = ".str",
StrGenericHi = ".strh",
StrReverse = ".rstr",
StrReverseHi = ".rstrh",
StrLen8 = ".l1str",
StrLen8Hi = ".l1strh",
StrLen16 = ".l2str",
StrLen16Hi = ".l2strh",
StrNullTerm = ".zstr",
StrNullTermHi = ".zstrh",
StrDci = ".dstr",
StrDciHi = ".dstrh",
StrDciReverse = ".rdstr",
StrDciReverseHi = ".rdstrh",
};
///
/// Computes the number of lines of output required to hold the formatted output.
///
/// Format definition.
/// Data format descriptor.
/// Line count.
public static int ComputeRequiredLineCount(Formatter formatter, FormatDescriptor dfd) {
switch (dfd.FormatType) {
case FormatDescriptor.Type.Default:
case FormatDescriptor.Type.NumericLE:
case FormatDescriptor.Type.NumericBE:
case FormatDescriptor.Type.Fill:
return 1;
case FormatDescriptor.Type.Dense: {
// no delimiter, two output bytes per input byte
int maxLen = MAX_OPERAND_LEN;
int textLen = dfd.Length * 2;
return (textLen + maxLen - 1) / maxLen;
}
case FormatDescriptor.Type.String: {
// Subtract two chars, to leave room for start/end delimiter. We use
// non-ASCII delimiters on-screen, so there's nothing to escape there.
int maxLen = MAX_OPERAND_LEN - 2;
// Remove leading length or trailing null byte from string length.
int textLen = dfd.Length;
switch (dfd.FormatSubType) {
case FormatDescriptor.SubType.None:
case FormatDescriptor.SubType.Dci:
case FormatDescriptor.SubType.Reverse:
case FormatDescriptor.SubType.DciReverse:
break;
case FormatDescriptor.SubType.CString:
case FormatDescriptor.SubType.L8String:
textLen--;
break;
case FormatDescriptor.SubType.L16String:
textLen -= 2;
break;
default:
Debug.Assert(false);
break;
}
int strLen = (textLen + maxLen - 1) / maxLen;
if (strLen == 0) {
// Empty string, but we still need to output a line.
strLen = 1;
}
return strLen;
}
default:
Debug.Assert(false);
return 1;
}
}
///
/// Generates a pseudo-op statement for the specified data operation.
///
/// For most operations, only one output line will be generated. For larger items,
/// like long comments, the value may be split into multiple lines. The sub-index
/// indicates which line should be formatted.
///
/// Format definition.
/// Table of pseudo-op names.
/// Project symbol table.
/// Symbol label map. May be null.
/// Data format descriptor.
/// File data array.
/// Start offset.
/// For multi-line items, which line.
public static PseudoOut FormatDataOp(Formatter formatter, PseudoOpNames opNames,
SymbolTable symbolTable, Dictionary labelMap,
FormatDescriptor dfd, byte[] data, int offset, int subIndex) {
int length = dfd.Length;
Debug.Assert(length > 0);
// All outputs for a given offset show the same offset and length, even for
// multi-line items.
PseudoOut po = new PseudoOut();
switch (dfd.FormatType) {
case FormatDescriptor.Type.Default:
if (length != 1) {
// This shouldn't happen.
Debug.Assert(false);
length = 1;
}
po.Opcode = opNames.GetDefineData(length);
int operand = RawData.GetWord(data, offset, length, false);
po.Operand = formatter.FormatHexValue(operand, length * 2);
break;
case FormatDescriptor.Type.NumericLE:
po.Opcode = opNames.GetDefineData(length);
operand = RawData.GetWord(data, offset, length, false);
po.Operand = FormatNumericOperand(formatter, symbolTable, labelMap, dfd,
operand, length, false);
break;
case FormatDescriptor.Type.NumericBE:
po.Opcode = opNames.GetDefineBigData(length);
operand = RawData.GetWord(data, offset, length, true);
po.Operand = FormatNumericOperand(formatter, symbolTable, labelMap, dfd,
operand, length, false);
break;
case FormatDescriptor.Type.Fill:
po.Opcode = opNames.Fill;
po.Operand = length + "," + formatter.FormatHexValue(data[offset], 2);
break;
case FormatDescriptor.Type.Dense: {
int maxPerLine = MAX_OPERAND_LEN / 2;
offset += subIndex * maxPerLine;
length -= subIndex * maxPerLine;
if (length > maxPerLine) {
length = maxPerLine;
}
po.Opcode = opNames.Dense;
po.Operand = formatter.FormatDenseHex(data, offset, length);
//List outList = new List();
//GenerateTextLines(text, "", "", po, outList);
//po = outList[subIndex];
}
break;
case FormatDescriptor.Type.String:
// It's hard to do strings in single-line pieces because of prefix lengths,
// terminating nulls, DCI polarity, and reverse-order strings. We
// really just want to convert the whole thing to a run of chars
// and then pull out a chunk. As an optimization we can handle
// generic strings (subtype=None) more efficiently, which should solve
// the problem of massive strings created by auto-analysis.
if (dfd.FormatSubType == FormatDescriptor.SubType.None) {
int maxPerLine = MAX_OPERAND_LEN - 2;
offset += subIndex * maxPerLine;
length -= subIndex * maxPerLine;
if (length > maxPerLine) {
length = maxPerLine;
}
char[] ltext = BytesToChars(formatter, opNames, dfd.FormatSubType, data,
offset, length, out string lpopcode, out int unused);
po.Opcode = lpopcode;
po.Operand = "\u201c" + new string(ltext) + "\u201d";
} else {
char[] text = BytesToChars(formatter, opNames, dfd.FormatSubType, data,
offset, length, out string popcode, out int showHexZeroes);
if (showHexZeroes == 1) {
po.Opcode = opNames.DefineData1;
po.Operand = formatter.FormatHexValue(0, 2);
} else if (showHexZeroes == 2) {
po.Opcode = opNames.DefineData2;
po.Operand = formatter.FormatHexValue(0, 4);
} else {
Debug.Assert(showHexZeroes == 0);
po.Opcode = popcode;
List outList = new List();
GenerateTextLines(text, "\u201c", "\u201d", po, outList);
po = outList[subIndex];
}
}
break;
default:
Debug.Assert(false);
po.Opcode = ".???";
po.Operand = "$" + data[offset].ToString("x2");
break;
}
return po;
}
///
/// Converts a collection of bytes that represent a string into an array of characters,
/// stripping the high bit. Framing data, such as leading lengths and trailing nulls,
/// are not shown.
///
/// Formatter object.
/// String sub-type.
/// File data.
/// Offset, within data, of start of string.
/// Number of bytes to convert.
/// Pseudo-opcode string.
/// If nonzero, show 1+ zeroes (representing a leading
/// length or null-termination) instead of an empty string.
/// Array of characters with string data.
private static char[] BytesToChars(Formatter formatter, PseudoOpNames opNames,
FormatDescriptor.SubType subType, byte[] data, int offset, int length,
out string popcode, out int showHexZeroes) {
Debug.Assert(length > 0);
// See also GenMerlin32.OutputString().
int strOffset = offset;
int strLen = length;
bool highAscii = false;
bool reverse = false;
showHexZeroes = 0;
switch (subType) {
case FormatDescriptor.SubType.None:
// High or low ASCII, full width specified by formatter.
highAscii = (data[offset] & 0x80) != 0;
popcode = highAscii ? opNames.StrGenericHi : opNames.StrGeneric;
break;
case FormatDescriptor.SubType.Dci:
// High or low ASCII, full width specified by formatter.
highAscii = (data[offset] & 0x80) != 0;
popcode = highAscii ? opNames.StrDciHi : opNames.StrDci;
break;
case FormatDescriptor.SubType.Reverse:
// High or low ASCII, full width specified by formatter. Show characters
// in reverse order.
highAscii = (data[offset + strLen - 1] & 0x80) != 0;
popcode = highAscii ? opNames.StrReverseHi : opNames.StrReverse;
reverse = true;
break;
case FormatDescriptor.SubType.DciReverse:
// High or low ASCII, full width specified by formatter. Show characters
// in reverse order.
highAscii = (data[offset + strLen - 1] & 0x80) != 0;
popcode = highAscii ? opNames.StrDciReverseHi : opNames.StrDciReverse;
reverse = true;
break;
case FormatDescriptor.SubType.CString:
// High or low ASCII, with a terminating null. Don't show the null. If
// it's an empty string, just show the null byte as hex.
highAscii = (data[offset] & 0x80) != 0;
popcode = highAscii ? opNames.StrNullTermHi : opNames.StrNullTerm;
strLen--;
if (strLen == 0) {
showHexZeroes = 1;
}
break;
case FormatDescriptor.SubType.L8String:
// High or low ASCII, with a leading length byte. Don't show the null.
// If it's an empty string, just show the length byte as hex.
strOffset++;
strLen--;
if (strLen == 0) {
showHexZeroes = 1;
} else {
highAscii = (data[strOffset] & 0x80) != 0;
}
popcode = highAscii ? opNames.StrLen8Hi : opNames.StrLen8;
break;
case FormatDescriptor.SubType.L16String:
// High or low ASCII, with a leading length word. Don't show the null.
// If it's an empty string, just show the length word as hex.
Debug.Assert(strLen > 1);
strOffset += 2;
strLen -= 2;
if (strLen == 0) {
showHexZeroes = 2;
} else {
highAscii = (data[strOffset] & 0x80) != 0;
}
popcode = highAscii ? opNames.StrLen16Hi : opNames.StrLen16;
break;
default:
Debug.Assert(false);
popcode = ".!!!";
break;
}
char[] text = new char[strLen];
if (!reverse) {
for (int i = 0; i < strLen; i++) {
text[i] = (char)(data[i + strOffset] & 0x7f);
}
} else {
for (int i = 0; i < strLen; i++) {
text[i] = (char)(data[strOffset + (strLen - i - 1)] & 0x7f);
}
}
return text;
}
///
/// Generate multiple operand lines from a text line, adding optional delimiters.
///
/// Buffer of characters to output. Must be ASCII.
/// Delimiter character(s), or the empty string.
/// Delimiter character(s), or the empty string.
/// PseudoOut with offset, length, and opcode set. Each
/// returned PseudoOut will have these value plus the generated operand.
/// List that receives the generated items.
private static void GenerateTextLines(char[] text, string startDelim, string endDelim,
PseudoOut template, List outList) {
// Could get fancy and break long strings at word boundaries.
int textOffset = 0;
if (text.Length == 0) {
// empty string
PseudoOut po = new PseudoOut(template);
po.Operand = startDelim + endDelim;
outList.Add(po);
return;
}
int textPerLine = MAX_OPERAND_LEN - (startDelim.Length + endDelim.Length);
StringBuilder sb = new StringBuilder(MAX_OPERAND_LEN);
while (textOffset < text.Length) {
int len = (text.Length - textOffset < textPerLine) ?
text.Length - textOffset : textPerLine;
sb.Clear();
sb.Append(startDelim);
sb.Append(new string(text, textOffset, len));
sb.Append(endDelim);
PseudoOut po = new PseudoOut(template);
po.Operand = sb.ToString();
outList.Add(po);
textOffset += len;
}
}
///
/// Format a numeric operand value according to the specified sub-format.
///
/// Text formatter.
/// Full table of project symbols.
/// Symbol label remap, for local label conversion. May be
/// null.
/// Operand format descriptor.
/// Operand's value. For most things this comes directly
/// out of the code, for relative branches it's a 24-bit absolute address.
/// Length of operand, in bytes. For an instruction, this
/// does not include the opcode byte. For a relative branch, this will be 2.
/// Set to true if the actual operand is a PC-relative value.
/// These get slightly different treatment.
public static string FormatNumericOperand(Formatter formatter, SymbolTable symbolTable,
Dictionary labelMap, FormatDescriptor dfd,
int operandValue, int operandLen, bool isPcRel) {
Debug.Assert(operandLen > 0);
int hexMinLen = operandLen * 2;
switch (dfd.FormatSubType) {
case FormatDescriptor.SubType.None:
case FormatDescriptor.SubType.Hex:
case FormatDescriptor.SubType.Address:
return formatter.FormatHexValue(operandValue, hexMinLen);
case FormatDescriptor.SubType.Decimal:
return formatter.FormatDecimalValue(operandValue);
case FormatDescriptor.SubType.Binary:
return formatter.FormatBinaryValue(operandValue, hexMinLen * 4);
case FormatDescriptor.SubType.Ascii:
return formatter.FormatAsciiOrHex(operandValue);
case FormatDescriptor.SubType.Symbol:
if (symbolTable.TryGetValue(dfd.SymbolRef.Label, out Symbol sym)) {
StringBuilder sb = new StringBuilder();
switch (formatter.ExpressionMode) {
case Formatter.FormatConfig.ExpressionMode.Simple:
FormatNumericSymbolSimple(formatter, sym, labelMap,
dfd, operandValue, operandLen, isPcRel, sb);
break;
case Formatter.FormatConfig.ExpressionMode.Merlin:
FormatNumericSymbolMerlin(formatter, sym, labelMap,
dfd, operandValue, operandLen, isPcRel, sb);
break;
default:
Debug.Assert(false, "Unknown expression mode " +
formatter.ExpressionMode);
return "???";
}
return sb.ToString();
} else {
return formatter.FormatHexValue(operandValue, hexMinLen);
}
default:
Debug.Assert(false);
return "???";
}
}
///
/// Format the symbol and adjustment using common expression syntax.
///
private static void FormatNumericSymbolSimple(Formatter formatter, Symbol sym,
Dictionary labelMap, FormatDescriptor dfd,
int operandValue, int operandLen, bool isPcRel, StringBuilder sb) {
// We could have some simple code that generated correct output, shifting and
// masking every time, but that's ugly and annoying. For single-byte ops we can
// just use the byte-select operators, for wider ops we get only as fancy as we
// need to be.
int adjustment, symbolValue;
string symLabel = sym.Label;
if (labelMap != null && labelMap.TryGetValue(symLabel, out string newLabel)) {
symLabel = newLabel;
}
if (operandLen == 1) {
// Use the byte-selection operator to get the right piece.
string selOp;
if (dfd.SymbolRef.ValuePart == WeakSymbolRef.Part.Bank) {
symbolValue = (sym.Value >> 16) & 0xff;
selOp = "^";
} else if (dfd.SymbolRef.ValuePart == WeakSymbolRef.Part.High) {
symbolValue = (sym.Value >> 8) & 0xff;
selOp = ">";
} else {
symbolValue = sym.Value & 0xff;
if (symbolValue == sym.Value) {
selOp = string.Empty;
} else {
selOp = "<";
}
}
sb.Append(selOp);
sb.Append(symLabel);
operandValue &= 0xff;
} else if (operandLen <= 4) {
// Operands and values should be 8/16/24 bit unsigned quantities. 32-bit
// support is really there so you can have a 24-bit pointer in a 32-bit hole.
// Might need to adjust this if 32-bit signed quantities become interesting.
uint mask = 0xffffffff >> ((4 - operandLen) * 8);
string shOp;
if (dfd.SymbolRef.ValuePart == WeakSymbolRef.Part.Bank) {
symbolValue = (sym.Value >> 16);
shOp = " >> 16";
} else if (dfd.SymbolRef.ValuePart == WeakSymbolRef.Part.High) {
symbolValue = (sym.Value >> 8);
shOp = " >> 8";
} else {
symbolValue = sym.Value;
shOp = "";
}
if (isPcRel) {
// PC-relative operands are funny, because an 8- or 16-bit value is always
// expanded to 24 bits. We output a 16-bit value that the assembler will
// convert back to 8-bit or 16-bit. In any event, the bank byte is never
// relevant to our computations.
operandValue &= 0xffff;
symbolValue &= 0xffff;
}
sb.Append(symLabel);
sb.Append(shOp);
if (symbolValue > mask) {
// Post-shift value won't fit in an operand-size box.
symbolValue = (int) (symbolValue & mask);
sb.Append(" & ");
sb.Append(formatter.FormatHexValue((int)mask, 2));
}
if (sb.Length != symLabel.Length) {
sb.Append(' ');
}
operandValue = (int)(operandValue & mask);
} else {
Debug.Assert(false, "bad numeric len");
sb.Append("?????");
symbolValue = 0;
}
adjustment = operandValue - symbolValue;
sb.Append(formatter.FormatAdjustment(adjustment));
}
///
/// Format the symbol and adjustment using Merlin expression syntax.
///
private static void FormatNumericSymbolMerlin(Formatter formatter, Symbol sym,
Dictionary labelMap, FormatDescriptor dfd,
int operandValue, int operandLen, bool isPcRel, StringBuilder sb) {
// The part-selection operators differ from "simple" in two ways:
// (1) They always happen last. If FOO=$10f0, "#>FOO+$18" == $11. (Strangely,
// all other operators are evaluated from left to right, with no concept
// of operator precedence.)
// (2) They select words, not bytes. If FOO=$123456, "#>FOO" is $1234. This is
// best thought of as a shift operator, rather than byte-selection. For
// 8-bit code this doesn't matter.
//
// This behavior leads to simpler expressions for simple symbol adjustments.
string symLabel = sym.Label;
if (labelMap != null && labelMap.TryGetValue(symLabel, out string newLabel)) {
symLabel = newLabel;
}
int adjustment;
// If we add or subtract an adjustment, it will be done on the full value, which
// is then shifted to the appropriate part. So we need to left-shift the operand
// value to match. We fill in the low bytes with the contes of the symbol, so
// that the adjustment doesn't include unnecessary values. (For example, let
// FOO=$10f0, with operand "#>FOO" ($10). We shift the operand to get $1000, then
// OR in the low byte to get $10f0, so that when we subtract we get adjustment==0.)
int adjOperand, keepLen;
if (dfd.SymbolRef.ValuePart == WeakSymbolRef.Part.Bank) {
adjOperand = operandValue << 16 | (sym.Value & 0xffff);
keepLen = 3;
} else if (dfd.SymbolRef.ValuePart == WeakSymbolRef.Part.High) {
adjOperand = (operandValue << 8) | (sym.Value & 0xff);
keepLen = 2;
} else {
adjOperand = operandValue;
keepLen = 1;
}
keepLen = Math.Max(keepLen, operandLen);
adjustment = adjOperand - sym.Value;
if (keepLen == 1) {
adjustment %= 256;
// Adjust for aesthetics. The assembler implicitly appiles a modulo operation,
// so we can use the value closest to zero.
if (adjustment > 127) {
adjustment = -(256 - adjustment) /*% 256*/;
} else if (adjustment < -128) {
adjustment = (256 + adjustment) /*% 256*/;
}
} else if (keepLen == 2) {
adjustment %= 65536;
if (adjustment > 32767) {
adjustment = -(65536 - adjustment) /*% 65536*/;
} else if (adjustment < -32768) {
adjustment = (65536 + adjustment) /*% 65536*/;
}
}
// Use the label from sym, not dfd's weak ref; might be different if label
// comparisons are case-insensitive.
switch (dfd.SymbolRef.ValuePart) {
case WeakSymbolRef.Part.Unknown:
case WeakSymbolRef.Part.Low:
// For Merlin, "<" is effectively a no-op. We can put it in for
// aesthetics when grabbing the low byte of a 16-bit value.
if ((operandLen == 1) && sym.Value > 0xff) {
sb.Append('<');
}
sb.Append(symLabel);
break;
case WeakSymbolRef.Part.High:
sb.Append('>');
sb.Append(symLabel);
break;
case WeakSymbolRef.Part.Bank:
sb.Append('^');
sb.Append(symLabel);
break;
default:
Debug.Assert(false, "bad part");
sb.Append("???");
break;
}
sb.Append(formatter.FormatAdjustment(adjustment));
}
}
}