From 8c053c29f290bd4edc8e1cb26236e46440f47740 Mon Sep 17 00:00:00 2001 From: Andy McFadden Date: Sat, 31 Jul 2021 14:42:36 -0700 Subject: [PATCH] Update ACME generator for v0.97 Two things changed: (1) string literals can now hold backslash escapes like "\n"; (2) MVN/MVP operands can now be prefixed with '#'. The former was a breaking change because any string with "\" must be changed to "\\". This is now handled by the string operand formatter. Also, improved test harness output. Show the assembler versions at the end, and include assembler failure messages in the collected output. --- Asm65/StringOpFormatter.cs | 29 +++++-- SourceGen/AsmGen/AsmAcme.cs | 22 ++++- SourceGen/AsmGen/AsmCc65.cs | 3 +- SourceGen/AsmGen/AsmMerlin32.cs | 2 +- SourceGen/AsmGen/AsmTass64.cs | 3 +- SourceGen/PseudoOp.cs | 2 +- SourceGen/RuntimeData/Help/codegen.html | 86 ++++++++++--------- SourceGen/RuntimeData/Help/index.html | 18 ++-- .../Expected/10002-allops-value-65816_acme.S | 4 +- .../Expected/10012-allops-zero-65816_acme.S | 4 +- .../SGTestData/Expected/20102-label-dp_acme.S | 4 +- .../Expected/20120-char-encoding-a_acme.S | 4 +- .../Expected/20130-char-encoding-p_acme.S | 2 +- .../Expected/20140-char-encoding-s_acme.S | 2 +- SourceGen/Tests/GenTest.cs | 14 +++ 15 files changed, 124 insertions(+), 75 deletions(-) diff --git a/Asm65/StringOpFormatter.cs b/Asm65/StringOpFormatter.cs index 36321c7..1a98252 100644 --- a/Asm65/StringOpFormatter.cs +++ b/Asm65/StringOpFormatter.cs @@ -43,6 +43,7 @@ namespace Asm65 { private Formatter.DelimiterDef mDelimiterDef; private RawOutputStyle mRawStyle; + private bool mBackslashEscapes; private int mMaxOperandLen; // Reference to array with 16 hex digits. (May be upper or lower case.) @@ -80,15 +81,18 @@ namespace Asm65 { /// String delimiter values. /// How to format raw byte data. /// Character conversion delegate. + /// True if "\" must be escaped with "\\". public StringOpFormatter(Formatter formatter, Formatter.DelimiterDef delimiterDef, - RawOutputStyle byteStyle, CharEncoding.Convert charConv) { - mRawStyle = byteStyle; - mMaxOperandLen = formatter.OperandWrapLen; - CharConv = charConv; - + RawOutputStyle byteStyle, CharEncoding.Convert charConv, + bool backslashEscapes) { mDelimiterDef = delimiterDef; - mBuffer = new char[mMaxOperandLen]; + mRawStyle = byteStyle; + CharConv = charConv; + mBackslashEscapes = backslashEscapes; + + mMaxOperandLen = formatter.OperandWrapLen; mHexChars = formatter.HexDigits; + mBuffer = new char[mMaxOperandLen]; Lines = new List(); // suffix not used, so we don't expect it to be set to something @@ -113,7 +117,7 @@ namespace Asm65 { /// isn't printable, the raw character value will be written as a byte instead. /// /// Raw character value. - public void WriteChar(byte rawCh) { + private void WriteChar(byte rawCh) { Debug.Assert(mState != State.Finished); char ch = CharConv(rawCh); @@ -160,7 +164,7 @@ namespace Asm65 { /// Write a hex value into the buffer. /// /// Value to add. - public void WriteByte(byte val) { + private void WriteByte(byte val) { Debug.Assert(mState != State.Finished); HasEscapedText = true; @@ -271,17 +275,26 @@ namespace Asm65 { } for (int off = endOffset - 1; off >= chunkOffset; off--) { WriteChar(data[off]); + if (data[off] == '\\' && mBackslashEscapes) { + WriteChar(data[off]); + } } } } else if (revMode == ReverseMode.FullReverse) { for (; offset < strEndOffset; offset++) { int posn = startOffset + (strEndOffset - offset) - 1; WriteChar(data[posn]); + if (data[posn] == '\\' && mBackslashEscapes) { + WriteChar(data[posn]); + } } } else { Debug.Assert(revMode == ReverseMode.Forward); for (; offset < strEndOffset; offset++) { WriteChar(data[offset]); + if (data[offset] == '\\' && mBackslashEscapes) { + WriteChar(data[offset]); + } } } diff --git a/SourceGen/AsmGen/AsmAcme.cs b/SourceGen/AsmGen/AsmAcme.cs index 6bfff8b..945fad1 100644 --- a/SourceGen/AsmGen/AsmAcme.cs +++ b/SourceGen/AsmGen/AsmAcme.cs @@ -103,6 +103,12 @@ namespace SourceGen.AsmGen { // Set if we're inside a "pseudopc" block, which will need to be closed. private bool mInPseudoPcBlock; + // v0.97 started treating '\' in constants as an escape character. + private bool mBackslashEscapes = true; + + // Interesting versions. + private static CommonUtil.Version V0_97 = new CommonUtil.Version(0, 97); + // Pseudo-op string constants. private static PseudoOp.PseudoOpNames sDataOpNames = @@ -150,6 +156,14 @@ namespace SourceGen.AsmGen { Project = project; + if (asmVersion != null) { + // Use the actual version. + mAsmVersion = asmVersion.Version; + } else { + // No assembler installed. Use v0.97. + mAsmVersion = V0_97; + } + // ACME isn't a single-pass assembler, but the code that determines label widths // only runs in the first pass and doesn't get corrected. So unlike cc65, which // generates correct zero-page acceses once the label's value is known, ACME @@ -168,7 +182,10 @@ namespace SourceGen.AsmGen { Quirks = new AssemblerQuirks(); Quirks.SinglePassAssembler = true; Quirks.SinglePassNoLabelCorrection = true; - Quirks.BlockMoveArgsNoHash = true; + if (mAsmVersion < V0_97) { + Quirks.BlockMoveArgsNoHash = true; + mBackslashEscapes = false; + } mWorkDirectory = workDirectory; mFileNameBase = fileNameBase; @@ -654,7 +671,8 @@ namespace SourceGen.AsmGen { } StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, - Formatter.DOUBLE_QUOTE_DELIM,StringOpFormatter.RawOutputStyle.CommaSep, charConv); + Formatter.DOUBLE_QUOTE_DELIM, StringOpFormatter.RawOutputStyle.CommaSep, charConv, + mBackslashEscapes); stropf.FeedBytes(data, offset, dfd.Length, leadingBytes, StringOpFormatter.ReverseMode.Forward); diff --git a/SourceGen/AsmGen/AsmCc65.cs b/SourceGen/AsmGen/AsmCc65.cs index a4d6e0a..46c905c 100644 --- a/SourceGen/AsmGen/AsmCc65.cs +++ b/SourceGen/AsmGen/AsmCc65.cs @@ -713,7 +713,8 @@ namespace SourceGen.AsmGen { } StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, - Formatter.DOUBLE_QUOTE_DELIM, StringOpFormatter.RawOutputStyle.CommaSep, charConv); + Formatter.DOUBLE_QUOTE_DELIM, StringOpFormatter.RawOutputStyle.CommaSep, charConv, + false); stropf.FeedBytes(data, offset, dfd.Length - trailingBytes, leadingBytes, StringOpFormatter.ReverseMode.Forward); diff --git a/SourceGen/AsmGen/AsmMerlin32.cs b/SourceGen/AsmGen/AsmMerlin32.cs index 7367835..967b540 100644 --- a/SourceGen/AsmGen/AsmMerlin32.cs +++ b/SourceGen/AsmGen/AsmMerlin32.cs @@ -616,7 +616,7 @@ namespace SourceGen.AsmGen { StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, new Formatter.DelimiterDef(delim), - StringOpFormatter.RawOutputStyle.DenseHex, charConv); + StringOpFormatter.RawOutputStyle.DenseHex, charConv, false); if (dfd.FormatType == FormatDescriptor.Type.StringDci) { // DCI is awkward because the character encoding flips on the last byte. Rather // than clutter up StringOpFormatter for this rare item, we just accept low/high diff --git a/SourceGen/AsmGen/AsmTass64.cs b/SourceGen/AsmGen/AsmTass64.cs index be54019..2c7ffc3 100644 --- a/SourceGen/AsmGen/AsmTass64.cs +++ b/SourceGen/AsmGen/AsmTass64.cs @@ -762,7 +762,8 @@ namespace SourceGen.AsmGen { } StringOpFormatter stropf = new StringOpFormatter(SourceFormatter, - Formatter.DOUBLE_QUOTE_DELIM,StringOpFormatter.RawOutputStyle.CommaSep, charConv); + Formatter.DOUBLE_QUOTE_DELIM,StringOpFormatter.RawOutputStyle.CommaSep, charConv, + false); if (dfd.FormatType == FormatDescriptor.Type.StringDci) { // DCI is awkward because the character encoding flips on the last byte. Rather // than clutter up StringOpFormatter for this rare item, we just accept low/high diff --git a/SourceGen/PseudoOp.cs b/SourceGen/PseudoOp.cs index 1101456..c47ef6f 100644 --- a/SourceGen/PseudoOp.cs +++ b/SourceGen/PseudoOp.cs @@ -545,7 +545,7 @@ namespace SourceGen { } StringOpFormatter stropf = new StringOpFormatter(formatter, delDef, - StringOpFormatter.RawOutputStyle.CommaSep, charConv); + StringOpFormatter.RawOutputStyle.CommaSep, charConv, false); stropf.FeedBytes(data, offset + hiddenLeadingBytes, dfd.Length - hiddenLeadingBytes - trailingBytes, 0, revMode); diff --git a/SourceGen/RuntimeData/Help/codegen.html b/SourceGen/RuntimeData/Help/codegen.html index 9517e9d..9bc7658 100644 --- a/SourceGen/RuntimeData/Help/codegen.html +++ b/SourceGen/RuntimeData/Help/codegen.html @@ -23,40 +23,6 @@ your web site, you can "export" the formatted code as text or HTML. This is explained in more detail below. -

Supported Assemblers

- -

SourceGen currently supports the following cross-assemblers:

- - -

Version-Specific Code Generation

- -

Code generation must be tailored to the specific version of the -assembler. This is most easily understood with an example.

-

If the code has a statement like MVN #$01,#$02, the -assembler is expected to output 54 02 01, with the arguments -reversed. cc65 v2.17 got it backward; the behavior was fixed in v2.18. The -bug means we can't generate the same MVN/MVP -instructions for both versions of the assembler.

-

Having version-dependent source code is a bad idea. If we generated -reversed operands (MVN #$02,#$01), we'd get the correct -output with v2.17, but the wrong output for v2.18. Unambiguous code can -be generated for all versions of the assembler by just outputting raw hex -bytes, but that's ugly and annoying, so we don't want to be stuck doing -that forever. We want to detect which version of the assembler is in -use, and output actual MVN/MVP instructions -when producing code for newer versions of the assembler.

-

When you configure a cross-assembler, SourceGen runs the executable with -version query args, and extracts the version information from the output -stream. This is used by the generator to ensure that the output will compile. -If no assembler is configured, SourceGen will produce code optimized -for the latest version of the assembler.

- -

Generating Source Code

Cross assemblers tend to generate additional files, either compiler @@ -149,7 +115,41 @@ SourceGen. However, SourceGen can generally work around assembler bugs, so any failure is an opportunity for improvement.

-

Assembler-Specific Bugs & Quirks

+

Supported Assemblers

+ +

SourceGen currently supports the following cross-assemblers:

+ + +

Version-Specific Code Generation

+ +

Code generation must be tailored to the specific version of the +assembler. This is most easily understood with an example.

+

If the code has a statement like MVN #$01,#$02, the +assembler is expected to output 54 02 01, with the arguments +reversed. cc65 v2.17 got it backward; the behavior was fixed in v2.18. The +bug means we can't generate the same MVN/MVP +instructions for both versions of the assembler.

+

Having version-dependent source code is a bad idea. If we generated +reversed operands (MVN #$02,#$01), we'd get the correct +output with v2.17, but the wrong output for v2.18. Unambiguous code can +be generated for all versions of the assembler by just outputting raw hex +bytes, but that's ugly and annoying, so we don't want to be stuck doing +that forever. We want to detect which version of the assembler is in +use, and output actual MVN/MVP instructions +when producing code for newer versions of the assembler.

+

When you configure a cross-assembler, SourceGen runs the executable with +version query args, and extracts the version information from the output +stream. This is used by the generator to ensure that the output will compile. +If no assembler is configured, SourceGen will produce code optimized +for the latest version of the assembler.

+ + +

Assembler-Specific Bugs & Quirks

This is a list of bugs and quirky behavior in cross-assemblers that SourceGen works around when generating code.

@@ -166,14 +166,15 @@ code, but also needs to know how to handle the corner cases.

64tass

-

Code is generated for 64tass v1.53.1515 or later. +

Tested versions: v1.53.1515, v1.54.1900 [web site]

Bugs:

Quirks:

@@ -217,7 +218,7 @@ code, but also needs to know how to handle the corner cases.

ACME

-

Code is generated for ACME v0.96.4 or later. +

Tested versions: v0.96.4 [web site]

Bugs:

@@ -243,7 +244,8 @@ code, but also needs to know how to handle the corner cases.

ASR instead.
  • Does not allow the accumulator to be specified explicitly as an operand, e.g. you can't write LSR A.
  • -
  • Syntax for MVN/MVP doesn't allow '#' +
  • [Fixed in v0.97.] + Syntax for MVN/MVP doesn't allow '#' before 8-bit operands.
  • Officially, the preferred file extension for ACME source code is ".a", but this is already used on UNIX systems for static libraries (which @@ -255,7 +257,7 @@ code, but also needs to know how to handle the corner cases.

    cc65

    -

    Code is generated for cc65 v2.17 or v2.18. +

    Tested versions: v2.17, v2.18 [web site]

    Bugs:

    @@ -297,7 +299,7 @@ code, but also needs to know how to handle the corner cases.

    Merlin 32

    -

    Code is generated for Merlin 32 v1.0. +

    Tested Versions: v1.0 [web site] [bug tracker]

    diff --git a/SourceGen/RuntimeData/Help/index.html b/SourceGen/RuntimeData/Help/index.html index 6139e6c..9bdae43 100644 --- a/SourceGen/RuntimeData/Help/index.html +++ b/SourceGen/RuntimeData/Help/index.html @@ -99,10 +99,6 @@ and 65816 code. The official web site is
  • Code Generation & Assembly
  • diff --git a/SourceGen/SGTestData/Expected/10002-allops-value-65816_acme.S b/SourceGen/SGTestData/Expected/10002-allops-value-65816_acme.S index 6cbc901..f6bd4e8 100644 --- a/SourceGen/SGTestData/Expected/10002-allops-value-65816_acme.S +++ b/SourceGen/SGTestData/Expected/10002-allops-value-65816_acme.S @@ -87,7 +87,7 @@ L101F ora ($ff,x) L10AB eor ($ff,x) !byte $42,$ff eor $ff,S - mvp $fe,$ff + mvp #$fe,#$ff eor $ff lsr $ff eor [$ff] @@ -104,7 +104,7 @@ L10AB eor ($ff,x) @L10CE eor ($ff),y eor ($ff) eor ($ff,S),y - mvn $fe,$ff + mvn #$fe,#$ff eor $ff,x lsr $ff,x eor [$ff],y diff --git a/SourceGen/SGTestData/Expected/10012-allops-zero-65816_acme.S b/SourceGen/SGTestData/Expected/10012-allops-zero-65816_acme.S index 30b1ff6..69c45fe 100644 --- a/SourceGen/SGTestData/Expected/10012-allops-zero-65816_acme.S +++ b/SourceGen/SGTestData/Expected/10012-allops-zero-65816_acme.S @@ -87,7 +87,7 @@ L101F ora ($00,x) L10AB eor ($00,x) !byte $42,$00 eor $00,S - mvp $00,$00 + mvp #$00,#$00 eor $00 lsr $00 eor [$00] @@ -104,7 +104,7 @@ L10AB eor ($00,x) @L10CE eor ($00),y eor ($00) eor ($00,S),y - mvn $00,$00 + mvn #$00,#$00 eor $00,x lsr $00,x eor [$00],y diff --git a/SourceGen/SGTestData/Expected/20102-label-dp_acme.S b/SourceGen/SGTestData/Expected/20102-label-dp_acme.S index 64cb178..371b974 100644 --- a/SourceGen/SGTestData/Expected/20102-label-dp_acme.S +++ b/SourceGen/SGTestData/Expected/20102-label-dp_acme.S @@ -88,7 +88,7 @@ L101F ora (L0080,x) L10AB eor (L0080,x) !byte $42,$80 eor $80,S - mvp $84,$83 + mvp #$84,#$83 eor+1 L0080 lsr+1 L0080 eor [L0080] @@ -105,7 +105,7 @@ L10AB eor (L0080,x) @L10CE eor (L0080),y eor (L0080) eor ($80,S),y - mvn $84,$83 + mvn #$84,#$83 eor+1 L0080,x lsr+1 L0080,x eor [L0080],y diff --git a/SourceGen/SGTestData/Expected/20120-char-encoding-a_acme.S b/SourceGen/SGTestData/Expected/20120-char-encoding-a_acme.S index 9bd365a..5b72e42 100644 --- a/SourceGen/SGTestData/Expected/20120-char-encoding-a_acme.S +++ b/SourceGen/SGTestData/Expected/20120-char-encoding-a_acme.S @@ -45,7 +45,7 @@ !pet $93,"PETSCII with ",$96,"control",$05," codes",$0d !byte $83 !text " !",$22,"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVW" - !text "XYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~" + !text "XYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~" !byte $83 !hex a0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebf !hex c0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedf @@ -134,7 +134,7 @@ !byte $1e !byte $1f !text " !",$22,"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVW" - !text "XYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~" + !text "XYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~" !byte $7f !byte $80 !byte $81 diff --git a/SourceGen/SGTestData/Expected/20130-char-encoding-p_acme.S b/SourceGen/SGTestData/Expected/20130-char-encoding-p_acme.S index f0e3a4c..84f150b 100644 --- a/SourceGen/SGTestData/Expected/20130-char-encoding-p_acme.S +++ b/SourceGen/SGTestData/Expected/20130-char-encoding-p_acme.S @@ -45,7 +45,7 @@ !pet $93,"PETSCII with ",$96,"control",$05," codes",$0d !byte $83 !text " !",$22,"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVW" - !text "XYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~" + !text "XYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~" !byte $83 !hex a0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebf !hex c0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedf diff --git a/SourceGen/SGTestData/Expected/20140-char-encoding-s_acme.S b/SourceGen/SGTestData/Expected/20140-char-encoding-s_acme.S index 134ffde..57e578d 100644 --- a/SourceGen/SGTestData/Expected/20140-char-encoding-s_acme.S +++ b/SourceGen/SGTestData/Expected/20140-char-encoding-s_acme.S @@ -45,7 +45,7 @@ !pet $93,"PETSCII with ",$96,"control",$05," codes",$0d !byte $83 !text " !",$22,"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVW" - !text "XYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~" + !text "XYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~" !byte $83 !hex a0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebf !hex c0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedf diff --git a/SourceGen/Tests/GenTest.cs b/SourceGen/Tests/GenTest.cs index cf82869..5e89187 100644 --- a/SourceGen/Tests/GenTest.cs +++ b/SourceGen/Tests/GenTest.cs @@ -155,6 +155,8 @@ namespace SourceGen.Tests { ReportProgress(successCount + " of " + testCases.Count + " tests passed\r\n"); } + PrintAsmVersions(); + return mResults; } @@ -317,6 +319,7 @@ namespace SourceGen.Tests { ReportErrMsg("assembler returned code=" + asmResults.ExitCode); ReportFailure(); didFail = true; + results.AsmResults = asmResults; continue; } @@ -373,6 +376,17 @@ namespace SourceGen.Tests { return !didFail; } + private void PrintAsmVersions() { + ReportProgress("\nTested assemblers:"); + IEnumerator iter = AssemblerInfo.GetInfoEnumerator(); + while (iter.MoveNext()) { + AssemblerInfo info = iter.Current; + AssemblerVersion version = AssemblerVersionCache.GetVersion(info.AssemblerId); + ReportProgress(" " + info.Name + " v" + version.VersionStr); + } + ReportProgress("\n"); + } + /// /// Gets a copy of the AppSettings with a standard set of formatting options (e.g. lower /// case for everything).