From b74630dd5b61f765dea8c4154c4cfad6b3e246c7 Mon Sep 17 00:00:00 2001 From: Andy McFadden Date: Fri, 20 Sep 2019 14:05:17 -0700 Subject: [PATCH] Work around two assembler issues Most assemblers end local label scope when a global label is encountered. cc65 takes this one step further by ending local label scope when constants or variables are defined. So, if we have a variable table with a nonzero number of entries, we want to create a fake global label at that point to end the scope. Merlin 32 won't let you write " LDA #',' ". For some reason the comma causes an error. IGenerator now has a "tweak operand format" interface that lets us fix that. --- SourceGen/AsmGen/AsmAcme.cs | 6 ++++ SourceGen/AsmGen/AsmCc65.cs | 7 ++++ SourceGen/AsmGen/AsmMerlin32.cs | 13 +++++++ SourceGen/AsmGen/AsmTass64.cs | 6 ++++ SourceGen/AsmGen/GenCommon.cs | 11 +++--- SourceGen/AsmGen/IGenerator.cs | 13 +++++++ SourceGen/AsmGen/LabelLocalizer.cs | 27 +++++++++++---- SourceGen/RuntimeData/Help/codegen.html | 5 +++ SourceGen/SGTestData/2019-local-variables | Bin 137 -> 151 bytes .../SGTestData/2019-local-variables.dis65 | 32 +++++++++++++++--- .../Expected/2019-local-variables_64tass.S | 11 +++++- .../Expected/2019-local-variables_Merlin32.S | 9 ++++- .../Expected/2019-local-variables_acme.S | 32 +++++++++++++++++- .../Expected/2019-local-variables_cc65.S | 7 ++++ .../Expected/2019-local-variables_cc65.cfg | 2 +- .../SGTestData/Source/2019-local-variables.S | 11 ++++++ 16 files changed, 174 insertions(+), 18 deletions(-) diff --git a/SourceGen/AsmGen/AsmAcme.cs b/SourceGen/AsmGen/AsmAcme.cs index 9a5e2b0..d6f0f3a 100644 --- a/SourceGen/AsmGen/AsmAcme.cs +++ b/SourceGen/AsmGen/AsmAcme.cs @@ -320,6 +320,12 @@ namespace SourceGen.AsmGen { return string.Empty; // indicate original is fine } + // IGenerator + public FormatDescriptor ModifyInstructionOperandFormat(int offset, FormatDescriptor dfd, + int operand) { + return dfd; + } + // IGenerator public void UpdateCharacterEncoding(FormatDescriptor dfd) { } diff --git a/SourceGen/AsmGen/AsmCc65.cs b/SourceGen/AsmGen/AsmCc65.cs index e94a037..b6ebc06 100644 --- a/SourceGen/AsmGen/AsmCc65.cs +++ b/SourceGen/AsmGen/AsmCc65.cs @@ -217,6 +217,7 @@ namespace SourceGen.AsmGen { mLocalizer = new LabelLocalizer(Project); if (!Settings.GetBool(AppSettings.SRCGEN_DISABLE_LABEL_LOCALIZATION, false)) { mLocalizer.LocalPrefix = "@"; + mLocalizer.QuirkVariablesEndScope = true; mLocalizer.Analyze(); } @@ -346,6 +347,12 @@ namespace SourceGen.AsmGen { } } + // IGenerator + public FormatDescriptor ModifyInstructionOperandFormat(int offset, FormatDescriptor dfd, + int operand) { + return dfd; + } + // IGenerator public void UpdateCharacterEncoding(FormatDescriptor dfd) { } diff --git a/SourceGen/AsmGen/AsmMerlin32.cs b/SourceGen/AsmGen/AsmMerlin32.cs index 54c8ce7..d344076 100644 --- a/SourceGen/AsmGen/AsmMerlin32.cs +++ b/SourceGen/AsmGen/AsmMerlin32.cs @@ -367,6 +367,19 @@ namespace SourceGen.AsmGen { return string.Empty; } + // IGenerator + public FormatDescriptor ModifyInstructionOperandFormat(int offset, FormatDescriptor dfd, + int operand) { + if (dfd.FormatType == FormatDescriptor.Type.NumericLE && dfd.IsStringOrCharacter && + (operand & 0x7f) == (byte)',') { + // Merlin throws an error on comma operands, e.g. LDA #',' + dfd = FormatDescriptor.Create(dfd.Length, + FormatDescriptor.Type.NumericLE, FormatDescriptor.SubType.None); + } + + return dfd; + } + // IGenerator public void UpdateCharacterEncoding(FormatDescriptor dfd) { } diff --git a/SourceGen/AsmGen/AsmTass64.cs b/SourceGen/AsmGen/AsmTass64.cs index fd7e8c9..186e50c 100644 --- a/SourceGen/AsmGen/AsmTass64.cs +++ b/SourceGen/AsmGen/AsmTass64.cs @@ -341,6 +341,12 @@ namespace SourceGen.AsmGen { return string.Empty; // indicate original is fine } + // IGenerator + public FormatDescriptor ModifyInstructionOperandFormat(int offset, FormatDescriptor dfd, + int operand) { + return dfd; + } + // IGenerator public void UpdateCharacterEncoding(FormatDescriptor dfd) { CharEncoding.Encoding newEnc = PseudoOp.SubTypeToEnc(dfd.FormatSubType); diff --git a/SourceGen/AsmGen/GenCommon.cs b/SourceGen/AsmGen/GenCommon.cs index 25f7f0f..4d60800 100644 --- a/SourceGen/AsmGen/GenCommon.cs +++ b/SourceGen/AsmGen/GenCommon.cs @@ -230,14 +230,17 @@ namespace SourceGen.AsmGen { // Check Length to watch for bogus descriptors. (ApplyFormatDescriptors() should // now be screening bad descriptors out, so we may not need the Length test.) if (attr.DataDescriptor != null && attr.Length == attr.DataDescriptor.Length) { + FormatDescriptor dfd = gen.ModifyInstructionOperandFormat(offset, + attr.DataDescriptor, operand); + // Format operand as directed. if (op.AddrMode == OpDef.AddressMode.BlockMove) { // Special handling for the double-operand block move. string opstr1 = PseudoOp.FormatNumericOperand(formatter, proj.SymbolTable, - gen.Localizer.LabelMap, attr.DataDescriptor, operand >> 8, 1, + gen.Localizer.LabelMap, dfd, operand >> 8, 1, PseudoOp.FormatNumericOpFlags.None); string opstr2 = PseudoOp.FormatNumericOperand(formatter, proj.SymbolTable, - gen.Localizer.LabelMap, attr.DataDescriptor, operand & 0xff, 1, + gen.Localizer.LabelMap, dfd, operand & 0xff, 1, PseudoOp.FormatNumericOpFlags.None); if (gen.Quirks.BlockMoveArgsReversed) { string tmp = opstr1; @@ -248,10 +251,10 @@ namespace SourceGen.AsmGen { formattedOperand = hash + opstr1 + "," + hash + opstr2; } else { if (attr.DataDescriptor.IsStringOrCharacter) { - gen.UpdateCharacterEncoding(attr.DataDescriptor); + gen.UpdateCharacterEncoding(dfd); } formattedOperand = PseudoOp.FormatNumericOperand(formatter, proj.SymbolTable, - lvLookup, gen.Localizer.LabelMap, attr.DataDescriptor, + lvLookup, gen.Localizer.LabelMap, dfd, offset, operandForSymbol, operandLen, opFlags); } } else { diff --git a/SourceGen/AsmGen/IGenerator.cs b/SourceGen/AsmGen/IGenerator.cs index 427adde..3bc3f44 100644 --- a/SourceGen/AsmGen/IGenerator.cs +++ b/SourceGen/AsmGen/IGenerator.cs @@ -95,6 +95,19 @@ namespace SourceGen.AsmGen { /// null if the op is unsupported or broken and should be emitted as hex. string ModifyOpcode(int offset, OpDef op); + /// + /// Provides an opportunity for the assembler to replace an instruction's format + /// descriptor with another. Only called if the instruction is explicitly formatted + /// (i.e. has a non-null descriptor). + /// + /// Instruction offset. + /// Existing descriptor. + /// Operand value. + /// Replacement format descriptor. If no changes are desired, returns + /// the dfd argument. + FormatDescriptor ModifyInstructionOperandFormat(int offset, FormatDescriptor dfd, + int operand); + /// /// Allows the generator to issue character encoding update instructions for source /// files with more than one encoding. diff --git a/SourceGen/AsmGen/LabelLocalizer.cs b/SourceGen/AsmGen/LabelLocalizer.cs index b69bd66..a7e12af 100644 --- a/SourceGen/AsmGen/LabelLocalizer.cs +++ b/SourceGen/AsmGen/LabelLocalizer.cs @@ -130,6 +130,11 @@ namespace SourceGen.AsmGen { /// public string LocalPrefix { get; set; } + /// + /// Set this if the declaration of a local variable ends the current scope. + /// + public bool QuirkVariablesEndScope { get; set; } + /// /// Project reference. /// @@ -230,8 +235,18 @@ namespace SourceGen.AsmGen { bool first = true; - for (int i = 0; i < mProject.FileDataLength; i++) { - Symbol sym = mProject.GetAnattrib(i).Symbol; + for (int offset = 0; offset < mProject.FileDataLength; offset++) { + Symbol sym = mProject.GetAnattrib(offset).Symbol; + + // In cc65, variable declarations end the local label scope. We insert a + // fake global symbol if we counter a table with a nonzero number of entries. + if (QuirkVariablesEndScope && + mProject.LvTables.TryGetValue(offset, out LocalVariableTable value) && + value.Count > 0) { + mGlobalFlags[offset] = true; + mGlobalLabels.Add(new OffsetLabel(offset, "!VARTAB!")); + continue; + } if (sym == null) { // No label at this offset. continue; @@ -239,22 +254,22 @@ namespace SourceGen.AsmGen { if (first || sym.SymbolType != Symbol.Type.LocalOrGlobalAddr) { first = false; - mGlobalFlags[i] = true; - mGlobalLabels.Add(new OffsetLabel(i, sym.Label)); + mGlobalFlags[offset] = true; + mGlobalLabels.Add(new OffsetLabel(offset, sym.Label)); // Don't add to pairs list. continue; } // If nothing actually references this label, the xref set will be empty. - XrefSet xrefs = mProject.GetXrefSet(i); + XrefSet xrefs = mProject.GetXrefSet(offset); if (xrefs != null) { foreach (XrefSet.Xref xref in xrefs) { if (!xref.IsByName) { continue; } - mOffsetPairs.Add(new OffsetPair(xref.Offset, i)); + mOffsetPairs.Add(new OffsetPair(xref.Offset, offset)); } } } diff --git a/SourceGen/RuntimeData/Help/codegen.html b/SourceGen/RuntimeData/Help/codegen.html index 4115e2e..784def8 100644 --- a/SourceGen/RuntimeData/Help/codegen.html +++ b/SourceGen/RuntimeData/Help/codegen.html @@ -251,6 +251,9 @@ code, but also needs to know how to handle the corner cases.

where the labels are used, the assembler will already have generated them as absolute values. Width disambiguation must be applied to operands that wouldn't be ambiguous to a multi-pass assembler. +
  • Assignment of constants and variables (= and + .set) ends local label scope, so the label localizer + has to take variable assignment into account.
  • The assembler is geared toward generating relocatable code with multiple segments (it is, after all, an assembler for a C compiler). A linker configuration script is expected to be provided for anything @@ -269,6 +272,8 @@ code, but also needs to know how to handle the corner cases.