6502bench/SourceGen/DataAnalysis.cs

/*
 * Copyright 2019 faddenSoft
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
using System;
using System.Diagnostics;

using Asm65;
using CommonUtil;
using TextScanMode = SourceGen.ProjectProperties.AnalysisParameters.TextScanMode;

namespace SourceGen {
    /// <summary>
    /// Auto-detection of structured data.
    /// 
    /// This class doesn't really hold any state.  It's just a convenient place to collect
    /// the items needed by the analyzer methods.
    /// </summary>
    public class DataAnalysis {
        // Minimum number of consecutive identical bytes for something to be called a "run".
        private const int MIN_RUN_LENGTH = 5;

        // Minimum length for treating data as a run if the byte is a printable character.
        // (Alternatively, the maximum length of a character string composed of a single value.)
        // Anything shorter than this is handled with a string directive, anything this long or
        // longer becomes FILL.  This should be larger than the MinCharsForString parameter.
        private const int MAX_STRING_RUN_LENGTH = 62;

        // Absolute minimum string length for auto-detection.  This is used when generating the
        // data tables.
        public const int MIN_STRING_LENGTH = 3;

        // Minimum length for an ASCII string.  Anything shorter is just output as bytes.
        // This is the default value; the actual value is configured as a project preference.
        public const int DEFAULT_MIN_STRING_LENGTH = 4;

        // Set min chars to this to disable string detection.
        public const int MIN_CHARS_FOR_STRING_DISABLED = int.MaxValue;

        /// <summary>
        /// Project with which we are associated.
        /// </summary>
        private DisasmProject mProject;

        /// <summary>
        /// Reference to 65xx data.
        /// </summary>
        private byte[] mFileData;

        /// <summary>
        /// Attributes, one per byte in input file.
        /// </summary>
        private Anattrib[] mAnattribs;

        /// <summary>
        /// Configurable parameters.
        /// </summary>
        private ProjectProperties.AnalysisParameters mAnalysisParams;


        /// <summary>
        /// Debug trace log.
        /// </summary>
        private DebugLog mDebugLog = new DebugLog(DebugLog.Priority.Silent);
        public DebugLog DebugLog {
            set {
                mDebugLog = value;
            }
        }


        /// <summary>
        /// Constructor.
        /// </summary>
        /// <param name="proj">Project to analyze.</param>
        /// <param name="anattribs">Anattrib array.</param>
        public DataAnalysis(DisasmProject proj, Anattrib[] anattribs) {
            mProject = proj;
            mAnattribs = anattribs;

            mFileData = proj.FileData;
            mAnalysisParams = proj.ProjectProps.AnalysisParams;
        }

        // Internal log functions. If we're concerned about performance overhead due to
        // call-site string concatenation, we can #ifdef these to nothing in release builds,
        // which should allow the compiler to elide the concat.
#if false
        private void LogV(int offset, string msg) {
            if (mDebugLog.IsLoggable(DebugLog.Priority.Verbose)) {
                mDebugLog.LogV("+" + offset.ToString("x6") + " " + msg);
            }
        }
#else
        private void LogV(int offset, string msg) { }
#endif
        private void LogD(int offset, string msg) {
            if (mDebugLog.IsLoggable(DebugLog.Priority.Debug)) {
                mDebugLog.LogD("+" + offset.ToString("x6") + " " + msg);
            }
        }
        private void LogI(int offset, string msg) {
            if (mDebugLog.IsLoggable(DebugLog.Priority.Info)) {
                mDebugLog.LogI("+" + offset.ToString("x6") + " " + msg);
            }
        }
        private void LogW(int offset, string msg) {
            if (mDebugLog.IsLoggable(DebugLog.Priority.Warning)) {
                mDebugLog.LogW("+" + offset.ToString("x6") + " " + msg);
            }
        }
        private void LogE(int offset, string msg) {
            if (mDebugLog.IsLoggable(DebugLog.Priority.Error)) {
                mDebugLog.LogE("+" + offset.ToString("x6") + " " + msg);
            }
        }

        /// <summary>
        /// Analyzes instruction operands and Address data descriptors to identify references
        /// to offsets within the file.
        /// 
        /// Instructions with format descriptors are left alone.  Instructions with
        /// operand offsets but no descriptor will have a descriptor generated
        /// using the label at the target offset; if the target offset is unlabeled,
        /// a unique label will be generated.  Data descriptors with type=Address are
        /// handled the same way.
        /// 
        /// In some cases, such as a reference to the middle of an instruction, we will
        /// label a nearby location instead.
        /// 
        /// This should be called after code analysis has run, user labels and format
        /// descriptors have been applied, and platform/project symbols have been merged
        /// into the symbol table.
        /// </summary>
        /// <returns>True on success.</returns>
        public void AnalyzeDataTargets() {
            mDebugLog.LogI("Analyzing data targets...");

            for (int offset = 0; offset < mAnattribs.Length; offset++) {
                Anattrib attr = mAnattribs[offset];
                if (attr.IsInstructionStart) {
                    if (attr.DataDescriptor != null) {
                        // It's being shown as numeric, or as a reference to some other symbol.
                        // Either way there's nothing further for us to do.  (Technically we
                        // would want to treat it like the no-descriptor case if the type was
                        // numeric/Address, but we don't allow that for instructions.)
                        //
                        // Project and platform symbols are applied later.

                        // (This assert is bogus -- this is possible with a bad bit of formatting.
                        // One way this can occur semi-naturally is to follow a JSR with a 16-bit
                        // value that evaluates to a pair of "illegal" instructions, which are
                        // then formatted by the user as a 16-bit address without tagging as
                        // inline data.  Enabling undocumented 6502 instructions throws it off.)
                        //Debug.Assert(attr.DataDescriptor.FormatSubType !=
                        //    FormatDescriptor.SubType.Address);
                        continue;
                    }

                    // Check for a relocation.  It'll be at offset+1 because it's on the operand,
                    // not the opcode byte.  (Make sure to check the length, or an RTS followed
                    // by relocated data will freak out.)
                    //
                    // We don't check for embedded instructions here.  If that did somehow happen,
                    // it's probably intentional, so we should do the replacement.
                    //
                    // TODO(someday): this won't get the second byte of an MVN/MVP, which is fine
                    // since we don't currently support two formats on one instruction.
                    if (mAnalysisParams.UseRelocData) {
                        if (attr.Length > 1 && mProject.RelocList.TryGetValue(offset + 1,
                                    out DisasmProject.RelocData reloc) &&
                                attr.Length > reloc.Width) {
                            // The relocation address differs from what the analyzer came up
                            // with.  This may be because of incorrect assumptions about the
                            // bank (assuming B==K) or because the partial address refers to
                            // a location outside the file bounds.  Whatever the case, if the
                            // address is different, attr.OperandOffset will also be different.
                            int relOperandOffset = mProject.AddrMap.AddressToOffset(offset,
                                reloc.Value);
                            if (relOperandOffset >= 0) {
                                // Determined a different offset.  Use that instead.
                                //Debug.WriteLine("REL +" + offset.ToString("x6") + " " +
                                //    reloc.Value.ToString("x6") + " vs. " +
                                //    attr.OperandAddress.ToString("x6"));
                                WeakSymbolRef.Part part = ShiftToPart(reloc.Shift);
                                SetDataTarget(offset, attr.Length, relOperandOffset, part);
                                continue;
                            }
                        }

                        // No reloc for this instruction.  If it's a relative branch we need
                        // to do the usual stuff, but if it's a PEA we want to treat it like
                        // an immediate value.  It should also be safe and useful to halt
                        // processing for "LDA abs" and the like.
                        OpDef op = mProject.CpuDef.GetOpDef(mProject.FileData[offset]);
                        bool stopHere = false;
                        switch (op.AddrMode) {
                            case OpDef.AddressMode.StackAbs:    // PEA
                            case OpDef.AddressMode.Abs:         // technically just non-PBR
                            case OpDef.AddressMode.AbsIndexX:
                            case OpDef.AddressMode.AbsIndexY:
                                stopHere = true;
                                break;
                            // AbsIndexXInd, AbsInd, AbsIndLong look like absolute addresses
                            // but use the program bank or bank 0.  They're unambiguous even
                            // without reloc data, so no need to block them.  That also goes
                            // for long addressing: ideally they'd have reloc data, but even if
                            // they don't, we might as well hook up a symbol because they can't
                            // mean anything else.  (I think.)
                        }
                        if (stopHere) {
                            continue;
                        }
                    }

                    int operandOffset = attr.OperandOffset;
                    if (operandOffset >= 0) {
                        // This is an offset reference: a branch or data access instruction
                        // whose target is inside the file.  Create a FormatDescriptor for it,
                        // and generate a label at the target if one is not already present.
                        SetDataTarget(offset, attr.Length, operandOffset, WeakSymbolRef.Part.Low);
                    }

                    // We advance by a single byte, rather than .Length, in case there's
                    // an instruction embedded inside another one.
                } else if (attr.DataDescriptor != null) {
                    // We can't check IsDataStart / IsInlineDataStart because the bytes might
                    // still be uncategorized.  If there's a user-specified format, check it
                    // to see if it's an address.
                    FormatDescriptor dfd = attr.DataDescriptor;

                    // Is this numeric/Address?
                    if ((dfd.FormatType == FormatDescriptor.Type.NumericLE ||
                            dfd.FormatType == FormatDescriptor.Type.NumericBE) &&
                            dfd.FormatSubType == FormatDescriptor.SubType.Address) {
                        // Treat like an absolute address.  Convert the operand
                        // to an address, then resolve the file offset.
                        int address = RawData.GetWord(mFileData, offset, dfd.Length,
                                (dfd.FormatType == FormatDescriptor.Type.NumericBE));
                        if (dfd.Length < 3) {
                            // Bank not specified by data, add current program bank.  Not always
                            // correct, but should be often enough.  In most cases we'd just
                            // assume a correct data bank register, but here we need to find
                            // a file offset, so we have to assume data bank == program bank
                            // (unless we find a good way to track the data bank register).
                            address |= attr.Address & 0x7fff0000;
                        }
                        int operandOffset = mProject.AddrMap.AddressToOffset(offset, address);
                        if (operandOffset >= 0) {
                            SetDataTarget(offset, dfd.Length, operandOffset,
                                WeakSymbolRef.Part.Low);
                        }
                    }

                    // For other formats, we don't need to do anything.  Numeric/Address is
                    // the only one that represents an offset reference.  Numeric/Symbol
                    // is a name reference.  The others are just data.

                    // There shouldn't be any data items inside other data items, so we
                    // can just skip forward.
                    offset += mAnattribs[offset].DataDescriptor.Length - 1;
                } else if (mAnalysisParams.UseRelocData && attr.IsUntyped &&
                        mProject.RelocList.TryGetValue(offset,
                            out DisasmProject.RelocData reloc)) {
                    // Byte is unformatted, but there's relocation data here.  If the full
                    // range of bytes is unformatted and unlabeled, create a symbolic reference.
                    // TODO: we can do better here when a multi-byte reloc has an auto-generated
                    // label mid-way through: create multiple, smaller formats for the same sym.
                    // Or don't generate auto labels until all reloc-based formats are placed.
                    bool allClear = true;
                    for (int i = 1; i < reloc.Width; i++) {
                        if (!mAnattribs[offset + i].IsUntyped ||
                                mAnattribs[offset + i].DataDescriptor != null ||
                                mAnattribs[offset + i].Symbol != null) {
                            allClear = false;
                            break;
                        }
                    }
                    if (allClear) {
                        int operandOffset = mProject.AddrMap.AddressToOffset(offset, reloc.Value);
                        if (operandOffset >= 0) {
                            //Debug.WriteLine("DREL +" + offset.ToString("x6") + " val=" +
                            //    reloc.Value.ToString("x6") +
                            //    " opOff=" + operandOffset.ToString("x6"));
                            SetDataTarget(offset, reloc.Width, operandOffset,
                                ShiftToPart(reloc.Shift));
                        }
                    }
                }
            }
        }

        private static WeakSymbolRef.Part ShiftToPart(int shift) {
            if (shift == -16) {
                return WeakSymbolRef.Part.Bank;
            } else if (shift == -8) {
                return WeakSymbolRef.Part.High;
            } else {
                return WeakSymbolRef.Part.Low;
            }
        }

        /// <summary>
        /// Extracts the operand offset from a data item.  Only useful for numeric/Address
        /// and numeric/Symbol.
        /// </summary>
        /// <param name="proj">Project reference.</param>
        /// <param name="offset">Offset of data item.</param>
        /// <returns>Operand offset, or -1 if not applicable.</returns>
        public static int GetDataOperandOffset(DisasmProject proj, int offset) {
            Anattrib attr = proj.GetAnattrib(offset);
            if (!attr.IsDataStart && !attr.IsInlineDataStart) {
                return -1;
            }
            FormatDescriptor dfd = attr.DataDescriptor;

            // Is this numeric/Address or numeric/Symbol?
            if ((dfd.FormatType != FormatDescriptor.Type.NumericLE &&
                    dfd.FormatType != FormatDescriptor.Type.NumericBE) ||
                    (dfd.FormatSubType != FormatDescriptor.SubType.Address &&
                    dfd.FormatSubType != FormatDescriptor.SubType.Symbol)) {
                return -1;
            }

            // Treat like an absolute address.  Convert the operand
            // to an address, then resolve the file offset.
            int address = RawData.GetWord(proj.FileData, offset, dfd.Length,
                    (dfd.FormatType == FormatDescriptor.Type.NumericBE));
            if (dfd.Length < 3) {
                // Add the program bank where the data bank should go.  Not perfect but
                // we don't have anything better at the moment.
                address |= attr.Address & 0x7fff0000;
            }
            int operandOffset = proj.AddrMap.AddressToOffset(offset, address);
            return operandOffset;
        }

        /// <summary>
        /// Returns the "base" operand offset.  If the byte at the specified offset is not the
        /// start of a code/data/inline-data item, walk backward until the start is found.
        /// </summary>
        /// <param name="proj">Project reference.</param>
        /// <param name="offset">Start offset.</param>
        /// <returns>Base offset.</returns>
        public static int GetBaseOperandOffset(DisasmProject proj, int offset) {
            Debug.Assert(offset >= 0 && offset < proj.FileDataLength);
            while (!proj.GetAnattrib(offset).IsStart) {
                offset--;

                // Should not be possible to walk off the top of the list, since we're in
                // the middle of something.
                Debug.Assert(offset >= 0);
            }
            return offset;
        }

        /// <summary>
        /// Creates a FormatDescriptor in the Anattrib array at srcOffset that links to
        /// targetOffset, or a nearby label.  If targetOffset doesn't have a useful label,
        /// one will be generated.
        /// 
        /// This is used for both instruction and data operands.
        /// </summary>
        /// <param name="srcOffset">Offset of instruction or address data.</param>
        /// <param name="srcLen">Length of instruction or data item.</param>
        /// <param name="targetOffset">Offset of target.</param>
        private void SetDataTarget(int srcOffset, int srcLen, int targetOffset,
                WeakSymbolRef.Part part) {
            // NOTE: don't try to cache mAnattribs[targetOffset] -- we may be changing
            // targetOffset and/or altering the Anattrib entry, so grabbing a copy of the
            // struct may lead to problems.

            // If the target offset has a symbol assigned, use it.  Otherwise, try to
            // find something nearby that might be more appropriate.
            int origTargetOffset = targetOffset;
            if (mAnattribs[targetOffset].Symbol is null) {
                if (mAnalysisParams.SeekNearbyTargets) {
                    targetOffset = FindAlternateTarget(srcOffset, targetOffset);
                }

                // If we're not interested in seeking nearby targets, or we are but we failed
                // to find something useful, we need to make sure that we're not pointing
                // into the middle of the instruction.  The assembler will only see labels on
                // the opcode bytes, so if we're pointing at the middle we need to back up.
                if (mAnattribs[targetOffset].IsInstruction &&
                        !mAnattribs[targetOffset].IsInstructionStart) {
                    while (!mAnattribs[--targetOffset].IsInstructionStart) {
                        // Should not be possible to move past the start of the file,
                        // since we know we're in the middle of an instruction.
                        Debug.Assert(targetOffset > 0);
                    }
                } else if (!mAnattribs[targetOffset].IsInstruction &&
                            !mAnattribs[targetOffset].IsStart) {
                    // This is not part of an instruction, and is not the start of a formatted
                    // data area.  However, it might be part of a formatted data area, in which
                    // case we need to avoid creating an auto label in the middle.  So we seek
                    // backward, looking for the first offset with a descriptor.  If that
                    // descriptor includes this offset, we set the target offset to that.
                    // (Note the uncategorized data pass hasn't run yet, so only instructions
                    // and offsets identified by users or scripts have been categorized.)
                    //
                    // ?? Can we use GetBaseOperandOffset(), which searches for IsStart?
                    //
                    // TODO(performance): we spend a significant amount of time in this loop.
                    int scanOffset = targetOffset;
                    while (--scanOffset >= 0) {
                        FormatDescriptor dfd = mAnattribs[scanOffset].DataDescriptor;
                        if (!(dfd is null)) {
                            if (scanOffset + dfd.Length > targetOffset) {
                                // Found a descriptor that encompasses target offset.  Adjust
                                // target to point at the start of the region.
                                targetOffset = scanOffset;
                            }
                            // Descriptors aren't allowed to overlap, so either way we're done.
                            break;
                        }
                    }
                }
            }

            if (mAnattribs[targetOffset].Symbol == null) {
                // No label at target offset, generate one.
                //
                // Generally speaking, the label we generate will be unique, because it
                // incorporates the address.  It's possible through various means to end
                // up with a user or platform label that matches an auto label, so we
                // need to do some renaming in that case.  Shouldn't happen often.
                Symbol sym = AutoLabel.GenerateUniqueForAddress(mAnattribs[targetOffset].Address,
                    mProject.SymbolTable, "L");
                mAnattribs[targetOffset].Symbol = sym;
                // This will throw if the symbol already exists.  That is the desired
                // behavior, as that would be a bug.
                mProject.SymbolTable.Add(sym);
            }

            // Create a Numeric/Symbol descriptor that references the target label.  If the
            // source offset already had a descriptor (e.g. Numeric/Address data item),
            // this will replace it in the Anattrib array.  (The user-specified format
            // is unaffected.)
            //
            // Doing this by target symbol, rather than offset in a Numeric/Address item,
            // allows us to avoid carrying the adjustment stuff everywhere.  OTOH we have
            // to manually refactor label renames in the display list if we don't want to
            // redo the data analysis.
            bool isBigEndian = false;
            if (mAnattribs[srcOffset].DataDescriptor != null) {
                LogD(srcOffset, "Replacing " + mAnattribs[srcOffset].DataDescriptor +
                    " with reference to " + mAnattribs[targetOffset].Symbol.Label +
                    ", adj=" + (origTargetOffset - targetOffset));
                if (mAnattribs[srcOffset].DataDescriptor.FormatType ==
                        FormatDescriptor.Type.NumericBE) {
                    isBigEndian = true;
                }
            } else {
                LogV(srcOffset, "Creating weak reference to label " +
                    mAnattribs[targetOffset].Symbol.Label +
                    ", adj=" + (origTargetOffset - targetOffset));
            }
            mAnattribs[srcOffset].DataDescriptor = FormatDescriptor.Create(srcLen,
                new WeakSymbolRef(mAnattribs[targetOffset].Symbol.Label, part), isBigEndian);
        }

        /// <summary>
        /// Given a reference from srcOffset to targetOffset, check to see if there's a
        /// nearby location that we'd prefer to refer to.  For example, if targetOffset points
        /// into the middle of an instruction, we'd rather have it refer to the first byte.
        /// </summary>
        /// <param name="srcOffset">Reference source.</param>
        /// <param name="targetOffset">Reference target.</param>
        /// <returns>New value for targetOffset, or original value if nothing better was
        ///   found.</returns>
        private int FindAlternateTarget(int srcOffset, int targetOffset) {
            int origTargetOffset = targetOffset;

            // Is the target outside the instruction stream?  If it's just referencing data,
            // do a simple check and move on.
            if (!mAnattribs[targetOffset].IsInstruction) {
                // We want to use user-defined labels whenever possible.  If they're accessing
                // memory within a few bytes, use that.  We don't want to do this for
                // code references, though, or our branches will get all weird.
                //
                // We look a few back and one forward.  Stuff backward (which turns into
                // LABEL+N) has priority over forward (which becomes LABEL-N).
                //
                // TODO(someday): make parameters user-configurable?
                const int MAX_FWD = 1;
                const int MAX_BACK = 3;
                int probeOffset = targetOffset;
                bool back = true;
                while (true) {
                    if (back) {
                        // moving backward
                        probeOffset--;
                        if (probeOffset < 0 || probeOffset < targetOffset - MAX_BACK) {
                            // too far back, reverse direction
                            probeOffset = targetOffset;
                            back = false;
                        }
                    }
                    if (!back) {
                        // moving forward
                        probeOffset++;
                        if (probeOffset >= mAnattribs.Length ||
                                probeOffset > targetOffset + MAX_FWD) {
                            break;  // done
                        }
                    }

                    Symbol sym = mAnattribs[probeOffset].Symbol;
                    if (sym != null && sym.SymbolSource == Symbol.Source.User) {
                        // Found a nearby user label.  Make sure it's actually nearby.
                        int addrDiff = mAnattribs[targetOffset].Address -
                                mAnattribs[probeOffset].Address;
                        if (addrDiff == targetOffset - probeOffset) {
                            targetOffset = probeOffset;
                            break;
                        } else {
                            Debug.WriteLine("NOT probing past address boundary change (src=+" +
                                srcOffset.ToString("x6") +
                                " targ=+" + targetOffset.ToString("x6") +
                                " probe=+" + probeOffset.ToString("x6") + ")");

                            // No point in continuing to search this direction, but we might
                            // need to look the other way.
                            if (back) {
                                probeOffset = targetOffset;
                                back = false;
                            } else {
                                break;
                            }
                        }
                    }
                }
                return targetOffset;
            }

            // Target is an instruction.  Is the source an instruction or data element
            // (e.g. ".dd2 <addr>").
            if (!mAnattribs[srcOffset].IsInstructionStart) {
                // Might be address-1 to set up an RTS.  If the target address isn't
                // an instruction start, check to see if the following byte is.
                if (!mAnattribs[targetOffset].IsInstructionStart &&
                        targetOffset + 1 < mAnattribs.Length &&
                        mAnattribs[targetOffset + 1].IsInstructionStart) {
                    LogD(srcOffset, "Offsetting address reference");
                    targetOffset++;
                }
                return targetOffset;
            }

            // Source is an instruction, so we have an instruction referencing an instruction.
            // Could be a branch, an address push, or self-modifying code.
            OpDef op = mProject.CpuDef.GetOpDef(mProject.FileData[srcOffset]);
            if (op.IsBranchOrSubCall) {
                // Don't mess with jumps and branches -- always go directly to the
                // target address.
            } else if (op == OpDef.OpPEA_StackAbs || op == OpDef.OpPER_StackPCRelLong) {
                // They might be pushing address-1 to set up an RTS.  If the target address isn't
                // an instruction start, check to see if the following byte is.
                if (!mAnattribs[targetOffset].IsInstructionStart &&
                        targetOffset + 1 < mAnattribs.Length &&
                        mAnattribs[targetOffset + 1].IsInstructionStart) {
                    LogD(srcOffset, "Offsetting PEA/PER");
                    targetOffset++;
                }
            } else {
                // Data operation (LDA, STA, etc).  This could be self-modifying code, or
                // an indexed access with an offset base address (LDA addr-1,Y) to an
                // adjacent data area.  Check to see if there's data right after this.
                bool nearbyData = false;
                for (int i = targetOffset + 1; i <= targetOffset + 2; i++) {
                    if (i < mAnattribs.Length && !mAnattribs[i].IsInstruction) {
                        targetOffset = i;
                        nearbyData = true;
                        break;
                    }
                }
                if (!nearbyData && !mAnattribs[targetOffset].IsInstructionStart) {
                    // There's no data nearby, and the target is not the start of the
                    // instruction, so this is probably self-modifying code.  We want
                    // the label to be on the opcode, so back up to the instruction start.
                    while (!mAnattribs[--targetOffset].IsInstructionStart) {
                        // Should not be possible to move past the start of the file,
                        // since we know we're in the middle of an instruction.
                        Debug.Assert(targetOffset > 0);
                    }
                }
            }

            if (targetOffset != origTargetOffset) {
                LogV(srcOffset, "Creating instruction ref adj=" +
                    (origTargetOffset - targetOffset));
            }

            return targetOffset;
        }

        /// <summary>
        /// Analyzes uncategorized regions of the file to see if they fit common patterns.
        /// 
        /// This is re-run after most changes to the project, so we don't want to do anything
        /// crazily expensive.
        /// </summary>
        /// <returns>True on success.</returns>
        public void AnalyzeUncategorized() {
            FormatDescriptor oneByteDefault = FormatDescriptor.Create(1,
                FormatDescriptor.Type.Default, FormatDescriptor.SubType.None);
            FormatDescriptor.DebugPrefabBump(-1);

            // If it hasn't been identified as code or data, set the "data" flag to
            // give it a positive identification as data.  (This should be the only
            // place outside of CodeAnalysis that sets this flag.)  This isn't strictly
            // necessary, but it helps us assert things when pieces start moving around.
            for (int offset = 0; offset < mAnattribs.Length; offset++) {
                Anattrib attr = mAnattribs[offset];
                if (attr.IsInlineData) {
                    // While we're here, add a default format descriptor for inline data
                    // that doesn't have one.  We don't try to analyze it otherwise.
                    if (attr.DataDescriptor == null) {
                        mAnattribs[offset].DataDescriptor = oneByteDefault;
                        FormatDescriptor.DebugPrefabBump();
                    }
                } else if (!attr.IsInstruction) {
                    mAnattribs[offset].IsData = true;
                }
            }

            mDebugLog.LogI("Analyzing uncategorized data...");

            int startOffset = -1;
            for (int offset = 0; offset < mAnattribs.Length; ) {
                // We want to find a contiguous series of offsets which are not known
                // to hold code or data.  We stop if we encounter a user-defined label,
                // format descriptor, or address override.
                Anattrib attr = mAnattribs[offset];

                if (attr.IsInstruction || attr.IsInlineData || attr.IsDataStart) {
                    // Instruction, inline data, or formatted data known to be here.  Analyze
                    // previous chunk, then advance past this.
                    if (startOffset >= 0) {
                        AnalyzeRange(startOffset, offset - 1);
                        startOffset = -1;
                    }
                    if (attr.IsInstruction) {
                        // Because of embedded instructions, we can't simply leap forward.
                        // [or can we?]
                        offset++;
                    } else {
                        Debug.Assert(attr.Length > 0);
                        offset += attr.Length;
                    }
                } else if (attr.Symbol != null || mProject.HasCommentNoteOrVis(offset)) {
                    // In an uncategorized area, but we want to break at this byte
                    // so the user or auto label doesn't get buried in the middle of
                    // a large chunk.
                    //
                    // This is similar to, but independent of, GroupedOffsetSetFromSelected()
                    // in ProjectView.  This is for auto-detection, the other is for user
                    // selection.  It's best if the two behave similarly though.
                    if (startOffset >= 0) {
                        AnalyzeRange(startOffset, offset - 1);
                    }
                    startOffset = offset;
                    offset++;
                } else {
                    // This offset is uncategorized, keep gathering.
                    if (startOffset < 0) {
                        startOffset = offset;
                    }
                    offset++;

                    // Check to see if we just crossed an address change.
                    if (offset < mAnattribs.Length &&
                            !mProject.AddrMap.IsSingleAddrRange(offset - 1, 2)) {
                        // Must be an ORG here.  End region and scan.
                        AnalyzeRange(startOffset, offset - 1);
                        startOffset = -1;
                    }
                }
            }

            // Do the last bit.
            if (startOffset >= 0) {
                AnalyzeRange(startOffset, mAnattribs.Length - 1);
            }
        }

        /// <summary>
        /// Analyzes a range of bytes, looking for opportunities to promote uncategorized
        /// data to a more structured form.
        /// </summary>
        /// <param name="start">Offset of first byte in range.</param>
        /// <param name="end">Offset of last byte in range.</param>
        private void AnalyzeRange(int start, int end) {
            // We want to identify runs of identical bytes, and runs of more than N human-
            // readable characters (ASCII, high ASCII, PETSCII, whatever).  There are a few
            // ways to do this.
            //
            // The simple approach is to walk through the data from start to end, checking at
            // each offset for runs of bytes matching the criteria.  Because the data doesn't
            // change, we can pre-analyze the data at project load time to speed things up.
            //
            // One approach is to put runs into TypedRangeSet (setting the type to the byte
            // value so a run of 0x00 doesn't merge into an adjacent run of 0x01), and the
            // various character encodings into individual RangeSets.  Then, for any given
            // byte address, you can query the length of a potential run directly.  This could
            // be made faster with a mergesort-like algorithm that walked through the various
            // range sets, rather than iterating over every byte in the range.  However, the
            // ranges passed into this method tend to be small, so the initial setup time for
            // each region can dominate the performance.  (The optimized implementation of this
            // approach is also fairly complicated.)
            //
            // A memory-hungry alternative is to create arrays of integers, one entry per byte
            // in the file, and set each entry to the number of bytes in the run that would
            // follow at that point.  So if a run of 20 zeroes began at off set 5, you would
            // set run[5]=20, run[6]=19, and so on.  That avoids searching in the sets, at the
            // cost of potentially several megabytes for a large 65816 file.
            //
            // It's even possible that Regex would handle this faster and more easily.  This
            // can be done fairly quickly with "unsafe" code, e.g.:
            //   https://stackoverflow.com/questions/3028768/net-regular-expressions-on-bytes-instead-of-chars
            //   https://stackoverflow.com/questions/1660694/regular-expression-to-match-any-character-being-repeated-more-than-10-times
            //
            // Ultimately we're just not spending that much time here.  Setting
            // AnalyzeUncategorizedData=false reveals that most of the time is spent in
            // the caller, identifying the regions, so a significant improvement here won't
            // have much impact on the user experience.
            //
            // Vague idea: figure out how to re-use the results from the previous analysis
            // pass.  At a superficial level we can cache the result of calling here with a
            // particular (start, end) pair.  At a higher level we may be able to avoid
            // the search for uncategorized data, certainly at the bank level, possibly within
            // a bank.

            mDebugLog.LogI("Analyzing [+" + start.ToString("x6") + ",+" + end.ToString("x6") +"]");

            FormatDescriptor oneByteDefault = FormatDescriptor.Create(1,
                        FormatDescriptor.Type.Default, FormatDescriptor.SubType.None);
            FormatDescriptor.DebugPrefabBump(-1);
            if (!mAnalysisParams.AnalyzeUncategorizedData) {
                // Analysis is disabled, so just mark everything as single-byte data.
                while (start <= end) {
                    mAnattribs[start].DataDescriptor = oneByteDefault;
                    FormatDescriptor.DebugPrefabBump();
                    start++;
                }
                return;
            }

            int minStringChars = mAnalysisParams.MinCharsForString;

#if DATA_PRESCAN   // this is actually slower (and uses more memory)
            while (start <= end) {
                // This is used to let us skip forward.  It starts past the end of the block,
                // and moves backward as we identify potential points of interest.
                int minNextStart = end + 1;

                bool found = mProject.RepeatedBytes.GetContainingOrSubsequentRange(start,
                        out TypedRangeSet.TypedRange tyRange);
                if (found) {
                    if (tyRange.Low <= start) {
                        // found a matching range
                        Debug.Assert(tyRange.Low <= start && tyRange.High >= start);
                        int clampEnd = Math.Min(tyRange.High, end);
                        int repLen = clampEnd - start + 1;
                        if (repLen >= MIN_RUN_LENGTH) {
                            bool isAscii =
                                TextUtil.IsPrintableAscii((char)(mFileData[start] & 0x7f));

                            // IF the run isn't ASCII, OR it's so long that we don't want to
                            // encode it as a string, OR it's so short that we don't want to
                            // treat it as a string, THEN output it as a run.  Otherwise, just
                            // let the ASCII-catcher handle it later.
                            if (!isAscii ||
                                    repLen > MIN_RUN_LENGTH_ASCII || repLen < minStringChars) {
                                LogV(start, "Run of 0x" + mFileData[start].ToString("x2") + ": " +
                                    repLen + " bytes");
                                mAnattribs[start].DataDescriptor = FormatDescriptor.Create(
                                    repLen, FormatDescriptor.Type.Fill,
                                    FormatDescriptor.SubType.None);
                                start += repLen;
                                continue;
                            }
                        }
                        // We didn't like this range.  We probably won't like it for any other
                        // point within the range, so start again past it.  Ideally we'd use
                        // Range.Low of the range that followed the one that was returned, but
                        // we don't have that handy.
                        minNextStart = Math.Min(minNextStart, tyRange.High + 1);
                    } else {
                        // no match; try to advance to the start of the next range.
                        Debug.Assert(tyRange.Low > start);
                        minNextStart = Math.Min(minNextStart, tyRange.Low);
                    }
                }

                found = mProject.StdAsciiBytes.GetContainingOrSubsequentRange(start,
                        out RangeSet.Range range);
                if (found) {
                    if (range.Low <= start) {
                        // found a matching range
                        Debug.Assert(range.Low <= start && range.High >= start);
                        int clampEnd = Math.Min(range.High, end);
                        int repLen = clampEnd - start + 1;
                        if (repLen >= minStringChars) {
                            LogV(start, "Std ASCII string, len=" + repLen + " bytes");
                            mAnattribs[start].DataDescriptor = FormatDescriptor.Create(repLen,
                                FormatDescriptor.Type.String, FormatDescriptor.SubType.None);
                            start += repLen;
                            continue;
                        }

                        minNextStart = Math.Min(minNextStart, range.High + 1);
                    } else {
                        Debug.Assert(range.Low > start);
                        minNextStart = Math.Min(minNextStart, range.Low);
                    }
                }

                found = mProject.HighAsciiBytes.GetContainingOrSubsequentRange(start,
                        out range);
                if (found) {
                    if (range.Low <= start) {
                        // found a matching range
                        Debug.Assert(range.Low <= start && range.High >= start);
                        int clampEnd = Math.Min(range.High, end);
                        int repLen = clampEnd - start + 1;
                        if (repLen >= minStringChars) {
                            LogV(start, "High ASCII string, len=" + repLen + " bytes");
                            mAnattribs[start].DataDescriptor = FormatDescriptor.Create(repLen,
                                FormatDescriptor.Type.String, FormatDescriptor.SubType.None);
                            start += repLen;
                            continue;
                        }

                        minNextStart = Math.Min(minNextStart, range.High + 1);
                    } else {
                        Debug.Assert(range.Low > start);
                        minNextStart = Math.Min(minNextStart, range.Low);
                    }
                }

                // Advance to the next possible run location.
                int nextStart = minNextStart > 0 ? minNextStart : start + 1;
                Debug.Assert(nextStart > start);

                // No runs found, output as single bytes.  This is the easiest form for users
                // to edit.
                while (start < nextStart) {
                    mAnattribs[start].DataDescriptor = oneByteDefault;
                    FormatDescriptor.DebugPrefabBump();
                    start++;
                }
            }
#else
            // Select "is printable" test.  We use the extended version to include some
            // control characters.
            // TODO(maybe): require some *actually* printable characters in each string
            CharEncoding.InclusionTest testPrintable;
            FormatDescriptor.SubType baseSubType;
            switch (mAnalysisParams.DefaultTextScanMode) {
                case TextScanMode.LowAscii:
                    testPrintable = CharEncoding.IsExtendedAscii;
                    baseSubType = FormatDescriptor.SubType.Ascii;
                    break;
                case TextScanMode.LowHighAscii:
                    testPrintable = CharEncoding.IsExtendedLowOrHighAscii;
                    baseSubType = FormatDescriptor.SubType.ASCII_GENERIC;
                    break;
                case TextScanMode.C64Petscii:
                    testPrintable = CharEncoding.IsExtendedC64Petscii;
                    baseSubType = FormatDescriptor.SubType.C64Petscii;
                    break;
                case TextScanMode.C64ScreenCode:
                    testPrintable = CharEncoding.IsExtendedC64ScreenCode;
                    baseSubType = FormatDescriptor.SubType.C64Screen;
                    break;
                default:
                    Debug.Assert(false);
                    testPrintable = CharEncoding.IsExtendedLowOrHighAscii;
                    baseSubType = FormatDescriptor.SubType.ASCII_GENERIC;
                    break;
            }

            while (start <= end) {
                // Check for block of repeated values.
                int runLen = RecognizeRun(mFileData, start, end);
                int printLen = 0;
                FormatDescriptor.SubType subType = baseSubType;

                if (testPrintable(mFileData[start])) {
                    // The run byte is printable, and the run is shorter than a line.  It's
                    // possible the run is followed by additional printable characters, e.g.
                    // "*****hello".  Text is easier for humans to understand, so we prefer
                    // that unless the run is longer than one line.
                    if (runLen <= MAX_STRING_RUN_LENGTH) {
                        // See if the run is followed by additional printable characters.
                        printLen = runLen;

                        // For LowHighAscii we allow a string to be either low or high, but it
                        // must be entirely one thing.  Refine our test.
                        CharEncoding.InclusionTest refinedTest = testPrintable;
                        if (mAnalysisParams.DefaultTextScanMode == TextScanMode.LowHighAscii) {
                            if (CharEncoding.IsExtendedAscii(mFileData[start])) {
                                refinedTest = CharEncoding.IsExtendedAscii;
                                subType = FormatDescriptor.SubType.Ascii;
                            } else {
                                refinedTest = CharEncoding.IsExtendedHighAscii;
                                subType = FormatDescriptor.SubType.HighAscii;
                            }
                        }
                        for (int i = start + runLen; i <= end; i++) {
                            if (!refinedTest(mFileData[i])) {
                                break;
                            }
                            printLen++;
                        }
                    }
                }

                if (printLen >= minStringChars) {
                    // This either a short run followed by printable characters, or just a
                    // (possibly very large) bunch of printable characters.
                    Debug.Assert(subType != FormatDescriptor.SubType.ASCII_GENERIC);
                    LogD(start, "Character string (" + subType + "), len=" + printLen + " bytes");
                    mAnattribs[start].DataDescriptor = FormatDescriptor.Create(printLen,
                        FormatDescriptor.Type.StringGeneric, subType);
                    start += printLen;
                } else if (runLen >= MIN_RUN_LENGTH) {
                    // Didn't qualify as a string, but it's long enough to be a run.
                    //
                    // TODO(someday): allow .fill pseudo-ops to have character encoding
                    //   sub-types, so we can ".fill 64,'*'".  Easy to do here, but
                    //   proper treatment requires tweaking data operand editor to allow
                    //   char encoding to be specified.
                    LogV(start, "Run of 0x" + mFileData[start].ToString("x2") + ": " +
                        runLen + " bytes");
                    mAnattribs[start].DataDescriptor = FormatDescriptor.Create(
                        runLen, FormatDescriptor.Type.Fill,
                        FormatDescriptor.SubType.None);
                    start += runLen;
                } else {
                    // Nothing useful found, output 1+ values as single bytes.  This is the
                    // easiest form for users to edit.  If we found a run, but it was too short,
                    // we can go ahead and mark all bytes in the run because we know the later
                    // matches will also be too short.
                    Debug.Assert(runLen > 0);
                    while (runLen-- != 0) {
                        mAnattribs[start++].DataDescriptor = oneByteDefault;
                        FormatDescriptor.DebugPrefabBump();
                    }
                }
            }
#endif
        }

        #region Static analyzer methods

        /// <summary>
        /// Checks for a repeated run of the same byte.
        /// </summary>
        /// <param name="fileData">Raw data.</param>
        /// <param name="start">Offset of first byte in range.</param>
        /// <param name="end">Offset of last byte in range.</param>
        /// <returns>Length of run.</returns>
        public static int RecognizeRun(byte[] fileData, int start, int end) {
            byte first = fileData[start];
            int index = start;
            while (++index <= end) {
                if (fileData[index] != first) {
                    break;
                }
            }
            return index - start;
        }

        /// <summary>
        /// Counts the number of low-ASCII, high-ASCII, and non-ASCII values in the
        /// specified region.
        /// </summary>
        /// <param name="fileData">Raw data.</param>
        /// <param name="start">Offset of first byte in range.</param>
        /// <param name="end">Offset of last byte in range</param>
        /// <param name="charTest">Character test delegate.  Must match on both high and
        ///   low characters.</param>
        /// <param name="lowVal">Set to the number of low-range characters found.</param>
        /// <param name="highVal">Set to the number of high-range characters found.</param>
        /// <param name="nonChar">Set to the number of non-character bytes found.</param>
        public static void CountHighLowBytes(byte[] fileData, int start, int end,
                CharEncoding.InclusionTest charTest,
                out int lowVal, out int highVal, out int nonChar) {
            lowVal = highVal = nonChar = 0;

            for (int i = start; i <= end; i++) {
                byte val = fileData[i];
                if (!charTest(val)) {
                    nonChar++;
                } else if ((val & 0x80) == 0) {
                    lowVal++;
                } else {
                    highVal++;
                }
            }
        }

        /// <summary>
        /// Counts the number of bytes that match the character test.
        /// </summary>
        /// <param name="fileData">Raw data.</param>
        /// <param name="start">Offset of first byte in range.</param>
        /// <param name="end">Offset of last byte in range.</param>
        /// <param name="charTest">Character test delegate.</param>
        /// <returns>Number of matching characters.</returns>
        public static int CountCharacterBytes(byte[] fileData, int start, int end,
                CharEncoding.InclusionTest charTest) {
            int count = 0;
            for (int i = start; i <= end; i++) {
                if (charTest(fileData[i])) {
                    count++;
                }
            }
            return count;
        }

        /// <summary>
        /// Counts the number of null-terminated strings in the buffer.
        /// 
        /// Zero-length strings are allowed but not included in the count.
        /// </summary>
        /// <param name="fileData">Raw data.</param>
        /// <param name="start">Offset of first byte in range.</param>
        /// <param name="end">Offset of last byte in range.</param>
        /// <param name="charTest">Character test delegate.</param>
        /// <param name="limitHiBit">If set, the high bit in all character must be the
        ///   same.  Used to enforce a single encoding when "low or high ASCII" is used.</param>
        /// <returns>Number of strings found, or -1 if bad data identified.</returns>
        public static int RecognizeNullTerminatedStrings(byte[] fileData, int start, int end,
                CharEncoding.InclusionTest charTest, bool limitHiBit) {
            // Quick test.
            if (fileData[end] != 0x00) {
                return -1;
            }

            int stringCount = 0;
            int expectedHiBit = -1;
            int stringLen = 0;
            for (int i = start; i <= end; i++) {
                byte val = fileData[i];
                if (val == 0x00) {
                    // End of string.  Only update count if string wasn't empty.
                    if (stringLen != 0) {
                        stringCount++;
                    }
                    stringLen = 0;
                    expectedHiBit = -1;
                } else {
                    if (limitHiBit) {
                        if (expectedHiBit == -1) {
                            // First byte in string, set hi/lo expectation.
                            expectedHiBit = val & 0x80;
                        } else if ((val & 0x80) != expectedHiBit) {
                            // Mixed ASCII or non-ASCII, fail.
                            return -1;
                        }
                    }
                    //if (!charTest(val)) {
                    //    // Not a matching character, fail.
                    //    return -1;
                    //}
                    stringLen++;
                }
            }

            return stringCount;
        }

        /// <summary>
        /// Counts strings prefixed with an 8-bit length.
        ///
        /// Zero-length strings are allowed but not counted.
        /// </summary>
        /// <param name="fileData">Raw data.</param>
        /// <param name="start">Offset of first byte in range.</param>
        /// <param name="end">Offset of last byte in range.</param>
        /// <param name="charTest">Character test delegate.</param>
        /// <param name="limitHiBit">If set, the high bit in all character must be the
        ///   same.  Used to enforce a single encoding when "low or high ASCII" is used.</param>
        /// <returns>Number of strings found, or -1 if bad data identified.</returns>
        public static int RecognizeLen8Strings(byte[] fileData, int start, int end,
                CharEncoding.InclusionTest charTest, bool limitHiBit) {
            int posn = start;
            int remaining = end - start + 1;
            int stringCount = 0;

            while (remaining > 0) {
                int strLen = fileData[posn++];
                if (strLen > --remaining) {
                    // Buffer doesn't hold entire string, fail.
                    return -1;
                }

                if (strLen == 0) {
                    continue;
                }
                stringCount++;
                remaining -= strLen;

                int expectedHiBit = fileData[posn] & 0x80;

                while (strLen-- != 0) {
                    byte val = fileData[posn++];
                    if (limitHiBit && (val & 0x80) != expectedHiBit) {
                        // Mixed ASCII, fail.
                        return -1;
                    }
                    //if (!charTest(val)) {
                    //    // Not a matching character, fail.
                    //    return -1;
                    //}
                }
            }

            return stringCount;
        }

        /// <summary>
        /// Counts strings prefixed with a 16-bit length.
        ///
        /// Zero-length strings are allowed but not counted.
        /// </summary>
        /// <param name="fileData">Raw data.</param>
        /// <param name="start">Offset of first byte in range.</param>
        /// <param name="end">Offset of last byte in range.</param>
        /// <param name="charTest">Character test delegate.</param>
        /// <param name="limitHiBit">If set, the high bit in all character must be the
        ///   same.  Used to enforce a single encoding when "low or high ASCII" is used.</param>
        /// <returns>Number of strings found, or -1 if bad data identified.</returns>
        public static int RecognizeLen16Strings(byte[] fileData, int start, int end,
                CharEncoding.InclusionTest charTest, bool limitHiBit) {
            int posn = start;
            int remaining = end - start + 1;
            int stringCount = 0;

            while (remaining > 0) {
                if (remaining < 2) {
                    // Not enough bytes for length, fail.
                    return -1;
                }
                int strLen = fileData[posn++];
                strLen |= fileData[posn++] << 8;
                remaining -= 2;
                if (strLen > remaining) {
                    // Buffer doesn't hold entire string, fail.
                    return -1;
                }

                if (strLen == 0) {
                    continue;
                }
                stringCount++;
                remaining -= strLen;

                int expectedHiBit = fileData[posn] & 0x80;

                while (strLen-- != 0) {
                    byte val = fileData[posn++];
                    if (limitHiBit && (val & 0x80) != expectedHiBit) {
                        // Mixed ASCII, fail.
                        return -1;
                    }
                    //if (!charTest(val)) {
                    //    // Not a matching character, fail.
                    //    return -1;
                    //}
                }
            }

            return stringCount;
        }

        /// <summary>
        /// Counts strings in Dextral Character Inverted format, meaning the high bit on the
        /// last byte is the opposite of the preceding.
        /// 
        /// To reduce false-positives, we require that all strings have the same hi/lo pattern.
        ///
        /// Single-character strings are allowed for C64 PETSCII, which doesn't have an
        /// equivalent to "high ASCII" character formatting, so long as the terminating
        /// character value has its high bit set.  Without this restriction, any collection
        /// of characters is just a list of DCI strings, which is a weird thing to offer up
        /// in the UI.
        /// </summary>
        /// <remarks>
        /// For C64Petscii, this will identify strings that are entirely in lower case except
        /// for the last letteR, or vice-versa.
        /// </remarks>
        /// <param name="fileData">Raw data.</param>
        /// <param name="start">Offset of first byte in range.</param>
        /// <param name="end">Offset of last byte in range.</param>
        /// <param name="charTest">Character test delegate.</param>
        /// <returns>Number of strings found, or -1 if bad data identified.</returns>
        public static int RecognizeDciStrings(byte[] fileData, int start, int end,
                CharEncoding.InclusionTest charTest) {
            int endHiBit = fileData[end] & 0x80;
            int stringCount = 0;
            int stringLen = 0;

            for (int i = start; i <= end; i++) {
                byte val = fileData[i];
                if ((val & 0x80) == endHiBit) {
                    // end of string
                    if (stringLen == 0) {
                        // Got two consecutive bytes with end-marker polarity.  Allow if the
                        // end char high bit is set.  Otherwise it's just a sequence of
                        // regular characters.
                        if (endHiBit == 0) {
                            return -1;
                        }
                    }
                    stringCount++;
                    stringLen = 0;
                } else {
                    stringLen++;
                }

                //if (!charTest((byte)(val & 0x7f))) {
                //    // Not a matching character, fail.
                //    return -1;
                //}
            }

            bool isAscii = charTest(0x5c);      // temporary hack
            if (isAscii && stringCount == end - start + 1) {
                // Entire region is single-character strings.  Don't allow for ASCII or
                // high ASCII.
                return -1;
            }
            return stringCount;
        }

#if false
        /// <summary>
        /// Counts strings in reverse Dextral Character Inverted format, meaning the string is
        /// stored in reverse order in memory, and the high bit on the first (last) byte is
        /// the opposite of the rest.
        /// 
        /// Each string must be at least two bytes.  To reduce false-positives, we require
        /// that all strings have the same hi/lo pattern.
        /// </summary>
        /// <param name="fileData">Raw data.</param>
        /// <param name="start">Offset of first byte in range.</param>
        /// <param name="end">Offset of last byte in range.</param>
        /// <returns>Number of strings found, or -1 if bad data identified.</returns>
        public static int RecognizeReverseDciStrings(byte[] fileData, int start, int end) {
            int expectedHiBit = fileData[end] & 0x80;
            int stringCount = 0;
            int stringLen = 0;

            // Quick test on last (first) byte.
            if ((fileData[start] & 0x80) == expectedHiBit) {
                return -1;
            }

            for (int i = end; i >= start; i--) {
                byte val = fileData[i];
                if ((val & 0x80) != expectedHiBit) {
                    // end of string
                    if (stringLen == 0) {
                        // Got two consecutive bytes with end-marker polarity... fail.
                        return -1;
                    }
                    stringCount++;
                    stringLen = 0;
                } else {
                    stringLen++;
                }

                val &= 0x7f;
                if (val < 0x20 || val == 0x7f) {
                    // Non-ASCII, fail.
                    return -1;
                }
            }

            return stringCount;
        }
#endif

        /// <summary>
        /// Verifies that the string data is what is expected.  Does not attempt to check
        /// the character encoding, just the structure.
        /// </summary>
        /// <param name="fileData">Raw data.</param>
        /// <param name="offset">Start offset of string.</param>
        /// <param name="length">Length of string, including leading length and terminating
        ///   null bytes.</param>
        /// <param name="type">Expected string type.</param>
        /// <param name="failMsg">Detailed failure message.</param>
        /// <returns>True if all is well.</returns>
        public static bool VerifyStringData(byte[] fileData, int offset, int length,
                FormatDescriptor.Type type, out string failMsg) {
            failMsg = string.Empty;

            switch (type) {
                case FormatDescriptor.Type.StringGeneric:
                case FormatDescriptor.Type.StringReverse:
                    return true;
                case FormatDescriptor.Type.StringNullTerm:
                    // must end in null byte, and have no null bytes before the end
                    int chk = offset;
                    while (length-- != 0) {
                        byte val = fileData[chk++];
                        if (val == 0x00) {
                            if (length != 0) {
                                failMsg = Res.Strings.STR_VFY_NULL_INSIDE_NULL_TERM;
                                return false;
                            } else {
                                return true;
                            }
                        }
                    }
                    failMsg = Res.Strings.STR_VFY_MISSING_NULL_TERM;
                    return false;
                case FormatDescriptor.Type.StringL8:
                    if (fileData[offset] != length - 1) {
                        failMsg = Res.Strings.STR_VFY_L1_LENGTH_MISMATCH;
                        return false;
                    }
                    return true;
                case FormatDescriptor.Type.StringL16:
                    int len = RawData.GetWord(fileData, offset, 2, false);
                    if (len != length - 2) {
                        failMsg = Res.Strings.STR_VFY_L2_LENGTH_MISMATCH;
                        return false;
                    }
                    return true;
                case FormatDescriptor.Type.StringDci:
                    //if (length < 2) {
                    //    failMsg = Res.Strings.STR_VFY_DCI_SHORT;
                    //    return false;
                    //}
                    byte first = (byte)(fileData[offset] & 0x80);
                    for (int i = offset + 1; i < offset + length - 1; i++) {
                        if ((fileData[i] & 0x80) != first) {
                            failMsg = Res.Strings.STR_VFY_DCI_MIXED_DATA;
                            return false;
                        }
                    }
                    if (length > 1 && (fileData[offset + length - 1] & 0x80) == first) {
                        failMsg = Res.Strings.STR_VFY_DCI_NOT_TERMINATED;
                        return false;
                    }
                    return true;
                default:
                    Debug.Assert(false);
                    return false;
            }
        }

        #endregion // Static analyzers
    }
}


#if DATA_PRESCAN
        /// <summary>
        /// Iterator that generates a list of offsets which are not known to hold code or data.
        /// 
        /// Generates a set of integers in ascending order.
        /// </summary>
        private class UndeterminedValueIterator : IEnumerator {
            /// <summary>
            /// Index of current item, or -1 if we're not started yet.
            /// </summary>
            private int mCurIndex;

            /// <summary>
            /// Reference to Anattrib array we're iterating over.
            /// </summary>
            private Anattrib[] mAnattribs;


            /// <summary>
            /// Constructor.
            /// </summary>
            public UndeterminedValueIterator(Anattrib[] anattribs) {
                mAnattribs = anattribs;
                Reset();
            }

            // IEnumerator: current element
            public object Current {
                get {
                    if (mCurIndex < 0) {
                        // not started
                        return null;
                    }
                    return mCurIndex;
                }
            }

            // IEnumerator: move to the next element, returning false if there isn't one
            public bool MoveNext() {
                while (++mCurIndex < mAnattribs.Length) {
                    Anattrib attr = mAnattribs[mCurIndex];
                    if (attr.IsInstructionStart) {
                        // skip past instruction
                        mCurIndex += attr.Length - 1;
                    } else if (attr.IsUncategorized) {
                        // got one
                        return true;
                    }
                }

                return false;
            }

            // IEnumerator: reset state
            public void Reset() {
                mCurIndex = -1;
            }
        }
#endif
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								/*
 								 * Copyright 2019 faddenSoft
 								 *
 								 * Licensed under the Apache License, Version 2.0 (the "License");
 								 * you may not use this file except in compliance with the License.
 								 * You may obtain a copy of the License at
 								 *
 								 *     http://www.apache.org/licenses/LICENSE-2.0
 								 *
 								 * Unless required by applicable law or agreed to in writing, software
 								 * distributed under the License is distributed on an "AS IS" BASIS,
 								 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								 * See the License for the specific language governing permissions and
 								 * limitations under the License.
 								 */
 								using System;
 								using System.Diagnostics;
 								using Asm65;
 								using CommonUtil;
-												Add multiple encoding support to uncategorized data analyzer

The code that searches for character strings in uncategorized data
now recognizes the C64 encodings when selected in the project
properties.

The new code avoids some redundant comparisons when runs of
printable characters are found.  I suspect the new implementation
loses on overall performance because we're now calling through
delegates instead of testing characters directly, but I haven't
tested for that.

											
										
										
											2019-08-13 21:08:27 +00:00
+								using TextScanMode = SourceGen.ProjectProperties.AnalysisParameters.TextScanMode;
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
-												Move WPF code from SourceGenWPF to SourceGen

											
										
										
											2019-07-20 20:28:10 +00:00
+								namespace SourceGen {
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								    /// <summary>
 								    /// Auto-detection of structured data.
 								    ///
 								    /// This class doesn't really hold any state.  It's just a convenient place to collect
 								    /// the items needed by the analyzer methods.
 								    /// </summary>
 								    public class DataAnalysis {
 								        // Minimum number of consecutive identical bytes for something to be called a "run".
 								        private const int MIN_RUN_LENGTH = 5;
-												Add multiple encoding support to uncategorized data analyzer

The code that searches for character strings in uncategorized data
now recognizes the C64 encodings when selected in the project
properties.

The new code avoids some redundant comparisons when runs of
printable characters are found.  I suspect the new implementation
loses on overall performance because we're now calling through
delegates instead of testing characters directly, but I haven't
tested for that.

											
										
										
											2019-08-13 21:08:27 +00:00
+								        // Minimum length for treating data as a run if the byte is a printable character.
 								        // (Alternatively, the maximum length of a character string composed of a single value.)
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								        // Anything shorter than this is handled with a string directive, anything this long or
 								        // longer becomes FILL.  This should be larger than the MinCharsForString parameter.
-												Add multiple encoding support to uncategorized data analyzer

The code that searches for character strings in uncategorized data
now recognizes the C64 encodings when selected in the project
properties.

The new code avoids some redundant comparisons when runs of
printable characters are found.  I suspect the new implementation
loses on overall performance because we're now calling through
delegates instead of testing characters directly, but I haven't
tested for that.

											
										
										
											2019-08-13 21:08:27 +00:00
+								        private const int MAX_STRING_RUN_LENGTH = 62;
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
 								        // Absolute minimum string length for auto-detection.  This is used when generating the
 								        // data tables.
 								        public const int MIN_STRING_LENGTH = 3;
 								        // Minimum length for an ASCII string.  Anything shorter is just output as bytes.
 								        // This is the default value; the actual value is configured as a project preference.
 								        public const int DEFAULT_MIN_STRING_LENGTH = 4;
 								        // Set min chars to this to disable string detection.
 								        public const int MIN_CHARS_FOR_STRING_DISABLED = int.MaxValue;
 								        /// <summary>
 								        /// Project with which we are associated.
 								        /// </summary>
 								        private DisasmProject mProject;
 								        /// <summary>
 								        /// Reference to 65xx data.
 								        /// </summary>
 								        private byte[] mFileData;
 								        /// <summary>
 								        /// Attributes, one per byte in input file.
 								        /// </summary>
 								        private Anattrib[] mAnattribs;
 								        /// <summary>
 								        /// Configurable parameters.
 								        /// </summary>
 								        private ProjectProperties.AnalysisParameters mAnalysisParams;
 								        /// <summary>
 								        /// Debug trace log.
 								        /// </summary>
 								        private DebugLog mDebugLog = new DebugLog(DebugLog.Priority.Silent);
 								        public DebugLog DebugLog {
 								            set {
 								                mDebugLog = value;
 								            }
 								        }
-												Use relocation data to format instruction operands

This was a relatively lightweight change to confirm the usefulness
of relocation data.  The results were very positive.

The relatively superficial integration of the data into the data
analysis process causes some problems, e.g. the cross-reference table
entries show an offset because the code analyzer's computed operand
offset doesn't match the value of the label.  The feature should be
considered experimental

The feature can be enabled or disabled with a project property.  The
results were sufficiently useful and non-annoying to make the setting
enabled by default.

											
										
										
											2020-07-04 00:37:04 +00:00
+								        /// <summary>
 								        /// Constructor.
 								        /// </summary>
 								        /// <param name="proj">Project to analyze.</param>
 								        /// <param name="anattribs">Anattrib array.</param>
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								        public DataAnalysis(DisasmProject proj, Anattrib[] anattribs) {
 								            mProject = proj;
 								            mAnattribs = anattribs;
 								            mFileData = proj.FileData;
 								            mAnalysisParams = proj.ProjectProps.AnalysisParams;
 								        }
 								        // Internal log functions. If we're concerned about performance overhead due to
 								        // call-site string concatenation, we can #ifdef these to nothing in release builds,
 								        // which should allow the compiler to elide the concat.
 								#if false
 								        private void LogV(int offset, string msg) {
 								            if (mDebugLog.IsLoggable(DebugLog.Priority.Verbose)) {
 								                mDebugLog.LogV("+" + offset.ToString("x6") + " " + msg);
 								            }
 								        }
 								#else
 								        private void LogV(int offset, string msg) { }
 								#endif
 								        private void LogD(int offset, string msg) {
 								            if (mDebugLog.IsLoggable(DebugLog.Priority.Debug)) {
 								                mDebugLog.LogD("+" + offset.ToString("x6") + " " + msg);
 								            }
 								        }
 								        private void LogI(int offset, string msg) {
 								            if (mDebugLog.IsLoggable(DebugLog.Priority.Info)) {
 								                mDebugLog.LogI("+" + offset.ToString("x6") + " " + msg);
 								            }
 								        }
 								        private void LogW(int offset, string msg) {
 								            if (mDebugLog.IsLoggable(DebugLog.Priority.Warning)) {
 								                mDebugLog.LogW("+" + offset.ToString("x6") + " " + msg);
 								            }
 								        }
 								        private void LogE(int offset, string msg) {
 								            if (mDebugLog.IsLoggable(DebugLog.Priority.Error)) {
 								                mDebugLog.LogE("+" + offset.ToString("x6") + " " + msg);
 								            }
 								        }
 								        /// <summary>
 								        /// Analyzes instruction operands and Address data descriptors to identify references
 								        /// to offsets within the file.
 								        ///
 								        /// Instructions with format descriptors are left alone.  Instructions with
 								        /// operand offsets but no descriptor will have a descriptor generated
 								        /// using the label at the target offset; if the target offset is unlabeled,
 								        /// a unique label will be generated.  Data descriptors with type=Address are
 								        /// handled the same way.
 								        ///
 								        /// In some cases, such as a reference to the middle of an instruction, we will
 								        /// label a nearby location instead.
 								        ///
 								        /// This should be called after code analysis has run, user labels and format
 								        /// descriptors have been applied, and platform/project symbols have been merged
 								        /// into the symbol table.
 								        /// </summary>
 								        /// <returns>True on success.</returns>
 								        public void AnalyzeDataTargets() {
 								            mDebugLog.LogI("Analyzing data targets...");
 								            for (int offset = 0; offset < mAnattribs.Length; offset++) {
 								                Anattrib attr = mAnattribs[offset];
 								                if (attr.IsInstructionStart) {
 								                    if (attr.DataDescriptor != null) {
 								                        // It's being shown as numeric, or as a reference to some other symbol.
 								                        // Either way there's nothing further for us to do.  (Technically we
 								                        // would want to treat it like the no-descriptor case if the type was
 								                        // numeric/Address, but we don't allow that for instructions.)
-												Various tweaks

Fixed a minor bug in GenerateLineList that would cause a blank line
to disappear under certain circumstances.  Harmless, but odd.

Added a width property to DefSymbol.

Updated comments.

											
										
										
											2019-08-25 00:35:26 +00:00
+								                        //
 								                        // Project and platform symbols are applied later.
-												Remove a couple of faulty assertions

One asserted unnecessarily, one should have been an if/then.  Both
were concerned with instruction operands being formatted with
type "address".

											
										
										
											2021-08-11 23:25:24 +00:00
 								                        // (This assert is bogus -- this is possible with a bad bit of formatting.
 								                        // One way this can occur semi-naturally is to follow a JSR with a 16-bit
 								                        // value that evaluates to a pair of "illegal" instructions, which are
 								                        // then formatted by the user as a 16-bit address without tagging as
 								                        // inline data.  Enabling undocumented 6502 instructions throws it off.)
 								                        //Debug.Assert(attr.DataDescriptor.FormatSubType !=
 								                        //    FormatDescriptor.SubType.Address);
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                        continue;
 								                    }
-												Use relocation data to format instruction operands

This was a relatively lightweight change to confirm the usefulness
of relocation data.  The results were very positive.

The relatively superficial integration of the data into the data
analysis process causes some problems, e.g. the cross-reference table
entries show an offset because the code analyzer's computed operand
offset doesn't match the value of the label.  The feature should be
considered experimental

The feature can be enabled or disabled with a project property.  The
results were sufficiently useful and non-annoying to make the setting
enabled by default.

											
										
										
											2020-07-04 00:37:04 +00:00
 								                    // Check for a relocation.  It'll be at offset+1 because it's on the operand,
-												Apply relocation data to unformatted data

Works well for things like jump tables.  Seeing a bunch of these
scattered in a chunk of data is a decent signal that it's actually
code.

In a bold move, we now exclude PEA operands from auto-label gen when
they don't have relocation data.  This is very useful for things
like Int2Hex for which constants are typically pushed with PEA.

Reworked the "use reloc data" setting so it defaults to false and is
explicitly set to true when converting OMF.  This provides a minor
optimization since we now check the boolean and skip doing a lookup
in an empty table.

											
										
										
											2020-07-04 05:03:50 +00:00
+								                    // not the opcode byte.  (Make sure to check the length, or an RTS followed
 								                    // by relocated data will freak out.)
-												Fix application of reloc info in data areas

The test wasn't correctly excluding instructions, so it was possible
to create a situation where a two-byte data item had an instruction
starting in the second byte.

We also weren't checking the length of the instruction to ensure that
it was wider than the reloc data.  This could get weird for an
immediate constant when the M/X flags are wrong.  When in doubt, don't
overwrite.

											
										
										
											2020-07-07 00:10:04 +00:00
+								                    //
 								                    // We don't check for embedded instructions here.  If that did somehow happen,
 								                    // it's probably intentional, so we should do the replacement.
 								                    //
-												Add 20212-reloc-data test

This test exercises the relocation data feature.  The test file is
generated from a multi-segment OMF file that was hex-edited to have
specific attributes (see 20212-reloc-data-lnk.S for instructions).
The test also serves as a way to exercise the OMF converter.

Also, implement the Bank Relative flag.

											
										
										
											2020-07-06 00:17:44 +00:00
+								                    // TODO(someday): this won't get the second byte of an MVN/MVP, which is fine
 								                    // since we don't currently support two formats on one instruction.
-												Apply relocation data to unformatted data

Works well for things like jump tables.  Seeing a bunch of these
scattered in a chunk of data is a decent signal that it's actually
code.

In a bold move, we now exclude PEA operands from auto-label gen when
they don't have relocation data.  This is very useful for things
like Int2Hex for which constants are typically pushed with PEA.

Reworked the "use reloc data" setting so it defaults to false and is
explicitly set to true when converting OMF.  This provides a minor
optimization since we now check the boolean and skip doing a lookup
in an empty table.

											
										
										
											2020-07-04 05:03:50 +00:00
+								                    if (mAnalysisParams.UseRelocData) {
 								                        if (attr.Length > 1 && mProject.RelocList.TryGetValue(offset + 1,
-												Fix application of reloc info in data areas

The test wasn't correctly excluding instructions, so it was possible
to create a situation where a two-byte data item had an instruction
starting in the second byte.

We also weren't checking the length of the instruction to ensure that
it was wider than the reloc data.  This could get weird for an
immediate constant when the M/X flags are wrong.  When in doubt, don't
overwrite.

											
										
										
											2020-07-07 00:10:04 +00:00
+								                                    out DisasmProject.RelocData reloc) &&
 								                                attr.Length > reloc.Width) {
-												Use relocation data to format instruction operands

This was a relatively lightweight change to confirm the usefulness
of relocation data.  The results were very positive.

The relatively superficial integration of the data into the data
analysis process causes some problems, e.g. the cross-reference table
entries show an offset because the code analyzer's computed operand
offset doesn't match the value of the label.  The feature should be
considered experimental

The feature can be enabled or disabled with a project property.  The
results were sufficiently useful and non-annoying to make the setting
enabled by default.

											
										
										
											2020-07-04 00:37:04 +00:00
+								                            // The relocation address differs from what the analyzer came up
 								                            // with.  This may be because of incorrect assumptions about the
 								                            // bank (assuming B==K) or because the partial address refers to
 								                            // a location outside the file bounds.  Whatever the case, if the
 								                            // address is different, attr.OperandOffset will also be different.
 								                            int relOperandOffset = mProject.AddrMap.AddressToOffset(offset,
 								                                reloc.Value);
-												Apply relocation data to unformatted data

Works well for things like jump tables.  Seeing a bunch of these
scattered in a chunk of data is a decent signal that it's actually
code.

In a bold move, we now exclude PEA operands from auto-label gen when
they don't have relocation data.  This is very useful for things
like Int2Hex for which constants are typically pushed with PEA.

Reworked the "use reloc data" setting so it defaults to false and is
explicitly set to true when converting OMF.  This provides a minor
optimization since we now check the boolean and skip doing a lookup
in an empty table.

											
										
										
											2020-07-04 05:03:50 +00:00
+								                            if (relOperandOffset >= 0) {
-												Use relocation data to format instruction operands

This was a relatively lightweight change to confirm the usefulness
of relocation data.  The results were very positive.

The relatively superficial integration of the data into the data
analysis process causes some problems, e.g. the cross-reference table
entries show an offset because the code analyzer's computed operand
offset doesn't match the value of the label.  The feature should be
considered experimental

The feature can be enabled or disabled with a project property.  The
results were sufficiently useful and non-annoying to make the setting
enabled by default.

											
										
										
											2020-07-04 00:37:04 +00:00
+								                                // Determined a different offset.  Use that instead.
 								                                //Debug.WriteLine("REL +" + offset.ToString("x6") + " " +
 								                                //    reloc.Value.ToString("x6") + " vs. " +
 								                                //    attr.OperandAddress.ToString("x6"));
-												Apply relocation data to unformatted data

Works well for things like jump tables.  Seeing a bunch of these
scattered in a chunk of data is a decent signal that it's actually
code.

In a bold move, we now exclude PEA operands from auto-label gen when
they don't have relocation data.  This is very useful for things
like Int2Hex for which constants are typically pushed with PEA.

Reworked the "use reloc data" setting so it defaults to false and is
explicitly set to true when converting OMF.  This provides a minor
optimization since we now check the boolean and skip doing a lookup
in an empty table.

											
										
										
											2020-07-04 05:03:50 +00:00
+								                                WeakSymbolRef.Part part = ShiftToPart(reloc.Shift);
-												Use relocation data to format instruction operands

This was a relatively lightweight change to confirm the usefulness
of relocation data.  The results were very positive.

The relatively superficial integration of the data into the data
analysis process causes some problems, e.g. the cross-reference table
entries show an offset because the code analyzer's computed operand
offset doesn't match the value of the label.  The feature should be
considered experimental

The feature can be enabled or disabled with a project property.  The
results were sufficiently useful and non-annoying to make the setting
enabled by default.

											
										
										
											2020-07-04 00:37:04 +00:00
+								                                SetDataTarget(offset, attr.Length, relOperandOffset, part);
 								                                continue;
 								                            }
 								                        }
-												Apply relocation data to unformatted data

Works well for things like jump tables.  Seeing a bunch of these
scattered in a chunk of data is a decent signal that it's actually
code.

In a bold move, we now exclude PEA operands from auto-label gen when
they don't have relocation data.  This is very useful for things
like Int2Hex for which constants are typically pushed with PEA.

Reworked the "use reloc data" setting so it defaults to false and is
explicitly set to true when converting OMF.  This provides a minor
optimization since we now check the boolean and skip doing a lookup
in an empty table.

											
										
										
											2020-07-04 05:03:50 +00:00
 								                        // No reloc for this instruction.  If it's a relative branch we need
 								                        // to do the usual stuff, but if it's a PEA we want to treat it like
-												Update relocation data handling

When we have relocation data available, the code currently skips the
process of matching an address with a label for a PEA instruction when
the instruction in question doesn't have reloc data.  This does a
great job of separating code that pushes parts of addresses from code
that pushes constants.

This change expands the behavior to exclude instructions with 16-bit
address operands that use the Data Bank Register, e.g. "LDA abs"
and "LDA abs,X".  This is particularly useful for code that accesses
structured data using the operand as the structure offset, e.g.
"LDX addr" / "LDA $0000,X"

The 20212-reloc-data test has been updated to check the behavior.

											
										
										
											2020-07-11 00:24:11 +00:00
+								                        // an immediate value.  It should also be safe and useful to halt
 								                        // processing for "LDA abs" and the like.
-												Apply relocation data to unformatted data

Works well for things like jump tables.  Seeing a bunch of these
scattered in a chunk of data is a decent signal that it's actually
code.

In a bold move, we now exclude PEA operands from auto-label gen when
they don't have relocation data.  This is very useful for things
like Int2Hex for which constants are typically pushed with PEA.

Reworked the "use reloc data" setting so it defaults to false and is
explicitly set to true when converting OMF.  This provides a minor
optimization since we now check the boolean and skip doing a lookup
in an empty table.

											
										
										
											2020-07-04 05:03:50 +00:00
+								                        OpDef op = mProject.CpuDef.GetOpDef(mProject.FileData[offset]);
-												Update relocation data handling

When we have relocation data available, the code currently skips the
process of matching an address with a label for a PEA instruction when
the instruction in question doesn't have reloc data.  This does a
great job of separating code that pushes parts of addresses from code
that pushes constants.

This change expands the behavior to exclude instructions with 16-bit
address operands that use the Data Bank Register, e.g. "LDA abs"
and "LDA abs,X".  This is particularly useful for code that accesses
structured data using the operand as the structure offset, e.g.
"LDX addr" / "LDA $0000,X"

The 20212-reloc-data test has been updated to check the behavior.

											
										
										
											2020-07-11 00:24:11 +00:00
+								                        bool stopHere = false;
 								                        switch (op.AddrMode) {
 								                            case OpDef.AddressMode.StackAbs:    // PEA
 								                            case OpDef.AddressMode.Abs:         // technically just non-PBR
 								                            case OpDef.AddressMode.AbsIndexX:
 								                            case OpDef.AddressMode.AbsIndexY:
 								                                stopHere = true;
 								                                break;
 								                            // AbsIndexXInd, AbsInd, AbsIndLong look like absolute addresses
 								                            // but use the program bank or bank 0.  They're unambiguous even
 								                            // without reloc data, so no need to block them.  That also goes
 								                            // for long addressing: ideally they'd have reloc data, but even if
 								                            // they don't, we might as well hook up a symbol because they can't
 								                            // mean anything else.  (I think.)
 								                        }
 								                        if (stopHere) {
-												Apply relocation data to unformatted data

Works well for things like jump tables.  Seeing a bunch of these
scattered in a chunk of data is a decent signal that it's actually
code.

In a bold move, we now exclude PEA operands from auto-label gen when
they don't have relocation data.  This is very useful for things
like Int2Hex for which constants are typically pushed with PEA.

Reworked the "use reloc data" setting so it defaults to false and is
explicitly set to true when converting OMF.  This provides a minor
optimization since we now check the boolean and skip doing a lookup
in an empty table.

											
										
										
											2020-07-04 05:03:50 +00:00
+								                            continue;
 								                        }
-												Use relocation data to format instruction operands

This was a relatively lightweight change to confirm the usefulness
of relocation data.  The results were very positive.

The relatively superficial integration of the data into the data
analysis process causes some problems, e.g. the cross-reference table
entries show an offset because the code analyzer's computed operand
offset doesn't match the value of the label.  The feature should be
considered experimental

The feature can be enabled or disabled with a project property.  The
results were sufficiently useful and non-annoying to make the setting
enabled by default.

											
										
										
											2020-07-04 00:37:04 +00:00
+								                    }
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                    int operandOffset = attr.OperandOffset;
 								                    if (operandOffset >= 0) {
-												Apply relocation data to unformatted data

Works well for things like jump tables.  Seeing a bunch of these
scattered in a chunk of data is a decent signal that it's actually
code.

In a bold move, we now exclude PEA operands from auto-label gen when
they don't have relocation data.  This is very useful for things
like Int2Hex for which constants are typically pushed with PEA.

Reworked the "use reloc data" setting so it defaults to false and is
explicitly set to true when converting OMF.  This provides a minor
optimization since we now check the boolean and skip doing a lookup
in an empty table.

											
										
										
											2020-07-04 05:03:50 +00:00
+								                        // This is an offset reference: a branch or data access instruction
 								                        // whose target is inside the file.  Create a FormatDescriptor for it,
 								                        // and generate a label at the target if one is not already present.
-												Use relocation data to format instruction operands

This was a relatively lightweight change to confirm the usefulness
of relocation data.  The results were very positive.

The relatively superficial integration of the data into the data
analysis process causes some problems, e.g. the cross-reference table
entries show an offset because the code analyzer's computed operand
offset doesn't match the value of the label.  The feature should be
considered experimental

The feature can be enabled or disabled with a project property.  The
results were sufficiently useful and non-annoying to make the setting
enabled by default.

											
										
										
											2020-07-04 00:37:04 +00:00
+								                        SetDataTarget(offset, attr.Length, operandOffset, WeakSymbolRef.Part.Low);
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                    }
 								                    // We advance by a single byte, rather than .Length, in case there's
 								                    // an instruction embedded inside another one.
 								                } else if (attr.DataDescriptor != null) {
 								                    // We can't check IsDataStart / IsInlineDataStart because the bytes might
 								                    // still be uncategorized.  If there's a user-specified format, check it
 								                    // to see if it's an address.
 								                    FormatDescriptor dfd = attr.DataDescriptor;
 								                    // Is this numeric/Address?
 								                    if ((dfd.FormatType == FormatDescriptor.Type.NumericLE ||
 								                            dfd.FormatType == FormatDescriptor.Type.NumericBE) &&
 								                            dfd.FormatSubType == FormatDescriptor.SubType.Address) {
 								                        // Treat like an absolute address.  Convert the operand
 								                        // to an address, then resolve the file offset.
 								                        int address = RawData.GetWord(mFileData, offset, dfd.Length,
 								                                (dfd.FormatType == FormatDescriptor.Type.NumericBE));
 								                        if (dfd.Length < 3) {
 								                            // Bank not specified by data, add current program bank.  Not always
 								                            // correct, but should be often enough.  In most cases we'd just
 								                            // assume a correct data bank register, but here we need to find
 								                            // a file offset, so we have to assume data bank == program bank
 								                            // (unless we find a good way to track the data bank register).
 								                            address |= attr.Address & 0x7fff0000;
 								                        }
 								                        int operandOffset = mProject.AddrMap.AddressToOffset(offset, address);
 								                        if (operandOffset >= 0) {
-												Use relocation data to format instruction operands

This was a relatively lightweight change to confirm the usefulness
of relocation data.  The results were very positive.

The relatively superficial integration of the data into the data
analysis process causes some problems, e.g. the cross-reference table
entries show an offset because the code analyzer's computed operand
offset doesn't match the value of the label.  The feature should be
considered experimental

The feature can be enabled or disabled with a project property.  The
results were sufficiently useful and non-annoying to make the setting
enabled by default.

											
										
										
											2020-07-04 00:37:04 +00:00
+								                            SetDataTarget(offset, dfd.Length, operandOffset,
 								                                WeakSymbolRef.Part.Low);
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                        }
 								                    }
 								                    // For other formats, we don't need to do anything.  Numeric/Address is
 								                    // the only one that represents an offset reference.  Numeric/Symbol
 								                    // is a name reference.  The others are just data.
 								                    // There shouldn't be any data items inside other data items, so we
 								                    // can just skip forward.
 								                    offset += mAnattribs[offset].DataDescriptor.Length - 1;
-												Fix application of reloc info in data areas

The test wasn't correctly excluding instructions, so it was possible
to create a situation where a two-byte data item had an instruction
starting in the second byte.

We also weren't checking the length of the instruction to ensure that
it was wider than the reloc data.  This could get weird for an
immediate constant when the M/X flags are wrong.  When in doubt, don't
overwrite.

											
										
										
											2020-07-07 00:10:04 +00:00
+								                } else if (mAnalysisParams.UseRelocData && attr.IsUntyped &&
-												Apply relocation data to unformatted data

Works well for things like jump tables.  Seeing a bunch of these
scattered in a chunk of data is a decent signal that it's actually
code.

In a bold move, we now exclude PEA operands from auto-label gen when
they don't have relocation data.  This is very useful for things
like Int2Hex for which constants are typically pushed with PEA.

Reworked the "use reloc data" setting so it defaults to false and is
explicitly set to true when converting OMF.  This provides a minor
optimization since we now check the boolean and skip doing a lookup
in an empty table.

											
										
										
											2020-07-04 05:03:50 +00:00
+								                        mProject.RelocList.TryGetValue(offset,
 								                            out DisasmProject.RelocData reloc)) {
 								                    // Byte is unformatted, but there's relocation data here.  If the full
-												Fix label-trampling bug in reloc data handler

If code accesses the high/low parts of a 32-bit address value with
no label, it auto-generates labels for addr+2 and addr.  The reloc
handler was replacing the unformatted bytes with a single multi-byte
format, hiding the label at addr+2.

The easy fix is to have the reloc data handler skip the entry.  This
is less useful than other approaches, but much simpler.

Added a test to 20212-reloc-data.

											
										
										
											2020-07-10 20:56:07 +00:00
+								                    // range of bytes is unformatted and unlabeled, create a symbolic reference.
 								                    // TODO: we can do better here when a multi-byte reloc has an auto-generated
 								                    // label mid-way through: create multiple, smaller formats for the same sym.
 								                    // Or don't generate auto labels until all reloc-based formats are placed.
-												Apply relocation data to unformatted data

Works well for things like jump tables.  Seeing a bunch of these
scattered in a chunk of data is a decent signal that it's actually
code.

In a bold move, we now exclude PEA operands from auto-label gen when
they don't have relocation data.  This is very useful for things
like Int2Hex for which constants are typically pushed with PEA.

Reworked the "use reloc data" setting so it defaults to false and is
explicitly set to true when converting OMF.  This provides a minor
optimization since we now check the boolean and skip doing a lookup
in an empty table.

											
										
										
											2020-07-04 05:03:50 +00:00
+								                    bool allClear = true;
 								                    for (int i = 1; i < reloc.Width; i++) {
-												Fix application of reloc info in data areas

The test wasn't correctly excluding instructions, so it was possible
to create a situation where a two-byte data item had an instruction
starting in the second byte.

We also weren't checking the length of the instruction to ensure that
it was wider than the reloc data.  This could get weird for an
immediate constant when the M/X flags are wrong.  When in doubt, don't
overwrite.

											
										
										
											2020-07-07 00:10:04 +00:00
+								                        if (!mAnattribs[offset + i].IsUntyped ||
-												Fix label-trampling bug in reloc data handler

If code accesses the high/low parts of a 32-bit address value with
no label, it auto-generates labels for addr+2 and addr.  The reloc
handler was replacing the unformatted bytes with a single multi-byte
format, hiding the label at addr+2.

The easy fix is to have the reloc data handler skip the entry.  This
is less useful than other approaches, but much simpler.

Added a test to 20212-reloc-data.

											
										
										
											2020-07-10 20:56:07 +00:00
+								                                mAnattribs[offset + i].DataDescriptor != null ||
 								                                mAnattribs[offset + i].Symbol != null) {
-												Apply relocation data to unformatted data

Works well for things like jump tables.  Seeing a bunch of these
scattered in a chunk of data is a decent signal that it's actually
code.

In a bold move, we now exclude PEA operands from auto-label gen when
they don't have relocation data.  This is very useful for things
like Int2Hex for which constants are typically pushed with PEA.

Reworked the "use reloc data" setting so it defaults to false and is
explicitly set to true when converting OMF.  This provides a minor
optimization since we now check the boolean and skip doing a lookup
in an empty table.

											
										
										
											2020-07-04 05:03:50 +00:00
+								                            allClear = false;
 								                            break;
 								                        }
 								                    }
 								                    if (allClear) {
 								                        int operandOffset = mProject.AddrMap.AddressToOffset(offset, reloc.Value);
 								                        if (operandOffset >= 0) {
 								                            //Debug.WriteLine("DREL +" + offset.ToString("x6") + " val=" +
 								                            //    reloc.Value.ToString("x6") +
 								                            //    " opOff=" + operandOffset.ToString("x6"));
 								                            SetDataTarget(offset, reloc.Width, operandOffset,
 								                                ShiftToPart(reloc.Shift));
 								                        }
 								                    }
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                }
 								            }
 								        }
-												Apply relocation data to unformatted data

Works well for things like jump tables.  Seeing a bunch of these
scattered in a chunk of data is a decent signal that it's actually
code.

In a bold move, we now exclude PEA operands from auto-label gen when
they don't have relocation data.  This is very useful for things
like Int2Hex for which constants are typically pushed with PEA.

Reworked the "use reloc data" setting so it defaults to false and is
explicitly set to true when converting OMF.  This provides a minor
optimization since we now check the boolean and skip doing a lookup
in an empty table.

											
										
										
											2020-07-04 05:03:50 +00:00
+								        private static WeakSymbolRef.Part ShiftToPart(int shift) {
 								            if (shift == -16) {
 								                return WeakSymbolRef.Part.Bank;
 								            } else if (shift == -8) {
 								                return WeakSymbolRef.Part.High;
 								            } else {
 								                return WeakSymbolRef.Part.Low;
 								            }
 								        }
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								        /// <summary>
 								        /// Extracts the operand offset from a data item.  Only useful for numeric/Address
 								        /// and numeric/Symbol.
 								        /// </summary>
 								        /// <param name="proj">Project reference.</param>
 								        /// <param name="offset">Offset of data item.</param>
 								        /// <returns>Operand offset, or -1 if not applicable.</returns>
 								        public static int GetDataOperandOffset(DisasmProject proj, int offset) {
 								            Anattrib attr = proj.GetAnattrib(offset);
 								            if (!attr.IsDataStart && !attr.IsInlineDataStart) {
 								                return -1;
 								            }
 								            FormatDescriptor dfd = attr.DataDescriptor;
 								            // Is this numeric/Address or numeric/Symbol?
 								            if ((dfd.FormatType != FormatDescriptor.Type.NumericLE &&
 								                    dfd.FormatType != FormatDescriptor.Type.NumericBE) ||
 								                    (dfd.FormatSubType != FormatDescriptor.SubType.Address &&
 								                    dfd.FormatSubType != FormatDescriptor.SubType.Symbol)) {
 								                return -1;
 								            }
 								            // Treat like an absolute address.  Convert the operand
 								            // to an address, then resolve the file offset.
 								            int address = RawData.GetWord(proj.FileData, offset, dfd.Length,
 								                    (dfd.FormatType == FormatDescriptor.Type.NumericBE));
 								            if (dfd.Length < 3) {
 								                // Add the program bank where the data bank should go.  Not perfect but
 								                // we don't have anything better at the moment.
 								                address |= attr.Address & 0x7fff0000;
 								            }
 								            int operandOffset = proj.AddrMap.AddressToOffset(offset, address);
 								            return operandOffset;
 								        }
 								        /// <summary>
 								        /// Returns the "base" operand offset.  If the byte at the specified offset is not the
 								        /// start of a code/data/inline-data item, walk backward until the start is found.
 								        /// </summary>
 								        /// <param name="proj">Project reference.</param>
 								        /// <param name="offset">Start offset.</param>
-												Instruction operand editor rework, part 3

Implemented editing of labels and project symbols.

Also, cleaned up the local variable edit code.

											
										
										
											2019-09-08 23:41:54 +00:00
+								        /// <returns>Base offset.</returns>
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								        public static int GetBaseOperandOffset(DisasmProject proj, int offset) {
 								            Debug.Assert(offset >= 0 && offset < proj.FileDataLength);
 								            while (!proj.GetAnattrib(offset).IsStart) {
 								                offset--;
 								                // Should not be possible to walk off the top of the list, since we're in
 								                // the middle of something.
 								                Debug.Assert(offset >= 0);
 								            }
 								            return offset;
 								        }
 								        /// <summary>
 								        /// Creates a FormatDescriptor in the Anattrib array at srcOffset that links to
 								        /// targetOffset, or a nearby label.  If targetOffset doesn't have a useful label,
 								        /// one will be generated.
 								        ///
 								        /// This is used for both instruction and data operands.
 								        /// </summary>
 								        /// <param name="srcOffset">Offset of instruction or address data.</param>
 								        /// <param name="srcLen">Length of instruction or data item.</param>
 								        /// <param name="targetOffset">Offset of target.</param>
-												Use relocation data to format instruction operands

This was a relatively lightweight change to confirm the usefulness
of relocation data.  The results were very positive.

The relatively superficial integration of the data into the data
analysis process causes some problems, e.g. the cross-reference table
entries show an offset because the code analyzer's computed operand
offset doesn't match the value of the label.  The feature should be
considered experimental

The feature can be enabled or disabled with a project property.  The
results were sufficiently useful and non-annoying to make the setting
enabled by default.

											
										
										
											2020-07-04 00:37:04 +00:00
+								        private void SetDataTarget(int srcOffset, int srcLen, int targetOffset,
 								                WeakSymbolRef.Part part) {
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								            // NOTE: don't try to cache mAnattribs[targetOffset] -- we may be changing
 								            // targetOffset and/or altering the Anattrib entry, so grabbing a copy of the
 								            // struct may lead to problems.
 								            // If the target offset has a symbol assigned, use it.  Otherwise, try to
 								            // find something nearby that might be more appropriate.
 								            int origTargetOffset = targetOffset;
-												Performance tweak

The Visual Studio performance profiler showed the FormatDescriptor
equality test being called quite a lot.  The test was vs. null, so
a simple change from "==" to "is" improved performance dramatically.

Fixing the underlying issue with a better data structure is still
important, but this provided a big boost with little effort.

											
										
										
											2020-07-07 19:09:00 +00:00
+								            if (mAnattribs[targetOffset].Symbol is null) {
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                if (mAnalysisParams.SeekNearbyTargets) {
 								                    targetOffset = FindAlternateTarget(srcOffset, targetOffset);
 								                }
 								                // If we're not interested in seeking nearby targets, or we are but we failed
 								                // to find something useful, we need to make sure that we're not pointing
 								                // into the middle of the instruction.  The assembler will only see labels on
 								                // the opcode bytes, so if we're pointing at the middle we need to back up.
 								                if (mAnattribs[targetOffset].IsInstruction &&
 								                        !mAnattribs[targetOffset].IsInstructionStart) {
 								                    while (!mAnattribs[--targetOffset].IsInstructionStart) {
 								                        // Should not be possible to move past the start of the file,
 								                        // since we know we're in the middle of an instruction.
 								                        Debug.Assert(targetOffset > 0);
 								                    }
 								                } else if (!mAnattribs[targetOffset].IsInstruction &&
 								                            !mAnattribs[targetOffset].IsStart) {
 								                    // This is not part of an instruction, and is not the start of a formatted
 								                    // data area.  However, it might be part of a formatted data area, in which
 								                    // case we need to avoid creating an auto label in the middle.  So we seek
 								                    // backward, looking for the first offset with a descriptor.  If that
 								                    // descriptor includes this offset, we set the target offset to that.
 								                    // (Note the uncategorized data pass hasn't run yet, so only instructions
 								                    // and offsets identified by users or scripts have been categorized.)
-												Instruction operand editor rework, part 3

Implemented editing of labels and project symbols.

Also, cleaned up the local variable edit code.

											
										
										
											2019-09-08 23:41:54 +00:00
+								                    //
 								                    // ?? Can we use GetBaseOperandOffset(), which searches for IsStart?
-												Fix application of reloc info in data areas

The test wasn't correctly excluding instructions, so it was possible
to create a situation where a two-byte data item had an instruction
starting in the second byte.

We also weren't checking the length of the instruction to ensure that
it was wider than the reloc data.  This could get weird for an
immediate constant when the M/X flags are wrong.  When in doubt, don't
overwrite.

											
										
										
											2020-07-07 00:10:04 +00:00
+								                    //
 								                    // TODO(performance): we spend a significant amount of time in this loop.
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                    int scanOffset = targetOffset;
-												Fix a couple of obscure bugs

The code that checked to see if a data target was inside a data
operand wasn't going all the way back to the start of the file.
It was also failing to stop when it should, wasting time.

The anattrib validation method has code that avoids a false-positive
on certain complex embedded instruction arrangements.  This was also
preventing it from seeing a transition from a data area to the
middle of an instruction (caused by issue #45).

											
										
										
											2019-09-05 00:48:55 +00:00
+								                    while (--scanOffset >= 0) {
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                        FormatDescriptor dfd = mAnattribs[scanOffset].DataDescriptor;
-												Performance tweak

The Visual Studio performance profiler showed the FormatDescriptor
equality test being called quite a lot.  The test was vs. null, so
a simple change from "==" to "is" improved performance dramatically.

Fixing the underlying issue with a better data structure is still
important, but this provided a big boost with little effort.

											
										
										
											2020-07-07 19:09:00 +00:00
+								                        if (!(dfd is null)) {
-												Fix a couple of obscure bugs

The code that checked to see if a data target was inside a data
operand wasn't going all the way back to the start of the file.
It was also failing to stop when it should, wasting time.

The anattrib validation method has code that avoids a false-positive
on certain complex embedded instruction arrangements.  This was also
preventing it from seeing a transition from a data area to the
middle of an instruction (caused by issue #45).

											
										
										
											2019-09-05 00:48:55 +00:00
+								                            if (scanOffset + dfd.Length > targetOffset) {
 								                                // Found a descriptor that encompasses target offset.  Adjust
 								                                // target to point at the start of the region.
 								                                targetOffset = scanOffset;
 								                            }
 								                            // Descriptors aren't allowed to overlap, so either way we're done.
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                            break;
 								                        }
 								                    }
 								                }
 								            }
 								            if (mAnattribs[targetOffset].Symbol == null) {
 								                // No label at target offset, generate one.
 								                //
 								                // Generally speaking, the label we generate will be unique, because it
 								                // incorporates the address.  It's possible through various means to end
 								                // up with a user or platform label that matches an auto label, so we
 								                // need to do some renaming in that case.  Shouldn't happen often.
 								                Symbol sym = AutoLabel.GenerateUniqueForAddress(mAnattribs[targetOffset].Address,
 								                    mProject.SymbolTable, "L");
 								                mAnattribs[targetOffset].Symbol = sym;
 								                // This will throw if the symbol already exists.  That is the desired
 								                // behavior, as that would be a bug.
 								                mProject.SymbolTable.Add(sym);
 								            }
 								            // Create a Numeric/Symbol descriptor that references the target label.  If the
 								            // source offset already had a descriptor (e.g. Numeric/Address data item),
 								            // this will replace it in the Anattrib array.  (The user-specified format
 								            // is unaffected.)
 								            //
 								            // Doing this by target symbol, rather than offset in a Numeric/Address item,
 								            // allows us to avoid carrying the adjustment stuff everywhere.  OTOH we have
 								            // to manually refactor label renames in the display list if we don't want to
 								            // redo the data analysis.
 								            bool isBigEndian = false;
 								            if (mAnattribs[srcOffset].DataDescriptor != null) {
 								                LogD(srcOffset, "Replacing " + mAnattribs[srcOffset].DataDescriptor +
 								                    " with reference to " + mAnattribs[targetOffset].Symbol.Label +
 								                    ", adj=" + (origTargetOffset - targetOffset));
 								                if (mAnattribs[srcOffset].DataDescriptor.FormatType ==
 								                        FormatDescriptor.Type.NumericBE) {
 								                    isBigEndian = true;
 								                }
 								            } else {
 								                LogV(srcOffset, "Creating weak reference to label " +
 								                    mAnattribs[targetOffset].Symbol.Label +
 								                    ", adj=" + (origTargetOffset - targetOffset));
 								            }
 								            mAnattribs[srcOffset].DataDescriptor = FormatDescriptor.Create(srcLen,
-												Use relocation data to format instruction operands

This was a relatively lightweight change to confirm the usefulness
of relocation data.  The results were very positive.

The relatively superficial integration of the data into the data
analysis process causes some problems, e.g. the cross-reference table
entries show an offset because the code analyzer's computed operand
offset doesn't match the value of the label.  The feature should be
considered experimental

The feature can be enabled or disabled with a project property.  The
results were sufficiently useful and non-annoying to make the setting
enabled by default.

											
										
										
											2020-07-04 00:37:04 +00:00
+								                new WeakSymbolRef(mAnattribs[targetOffset].Symbol.Label, part), isBigEndian);
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								        }
 								        /// <summary>
 								        /// Given a reference from srcOffset to targetOffset, check to see if there's a
 								        /// nearby location that we'd prefer to refer to.  For example, if targetOffset points
 								        /// into the middle of an instruction, we'd rather have it refer to the first byte.
 								        /// </summary>
 								        /// <param name="srcOffset">Reference source.</param>
 								        /// <param name="targetOffset">Reference target.</param>
 								        /// <returns>New value for targetOffset, or original value if nothing better was
 								        ///   found.</returns>
 								        private int FindAlternateTarget(int srcOffset, int targetOffset) {
 								            int origTargetOffset = targetOffset;
 								            // Is the target outside the instruction stream?  If it's just referencing data,
 								            // do a simple check and move on.
 								            if (!mAnattribs[targetOffset].IsInstruction) {
 								                // We want to use user-defined labels whenever possible.  If they're accessing
 								                // memory within a few bytes, use that.  We don't want to do this for
 								                // code references, though, or our branches will get all weird.
-												Tweak "nearby" label finder

The code that found a nearby data target for an instruction operand
was searching backward but not forward.  We now take one step
forward, so that "LDA TABLE-1,Y" fills in automatically.

This altered 2008-address-changes, which had just this situation.
It didn't alter 2010-target-adjustment, but the existing tests were
insufficient and have been improved.

											
										
										
											2019-10-30 01:12:22 +00:00
+								                //
 								                // We look a few back and one forward.  Stuff backward (which turns into
 								                // LABEL+N) has priority over forward (which becomes LABEL-N).
 								                //
 								                // TODO(someday): make parameters user-configurable?
 								                const int MAX_FWD = 1;
 								                const int MAX_BACK = 3;
 								                int probeOffset = targetOffset;
 								                bool back = true;
 								                while (true) {
 								                    if (back) {
 								                        // moving backward
 								                        probeOffset--;
 								                        if (probeOffset < 0 || probeOffset < targetOffset - MAX_BACK) {
 								                            // too far back, reverse direction
 								                            probeOffset = targetOffset;
 								                            back = false;
 								                        }
 								                    }
 								                    if (!back) {
 								                        // moving forward
 								                        probeOffset++;
 								                        if (probeOffset >= mAnattribs.Length ||
 								                                probeOffset > targetOffset + MAX_FWD) {
 								                            break;  // done
 								                        }
 								                    }
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                    Symbol sym = mAnattribs[probeOffset].Symbol;
 								                    if (sym != null && sym.SymbolSource == Symbol.Source.User) {
 								                        // Found a nearby user label.  Make sure it's actually nearby.
 								                        int addrDiff = mAnattribs[targetOffset].Address -
-												Tweak "nearby" label finder

The code that found a nearby data target for an instruction operand
was searching backward but not forward.  We now take one step
forward, so that "LDA TABLE-1,Y" fills in automatically.

This altered 2008-address-changes, which had just this situation.
It didn't alter 2010-target-adjustment, but the existing tests were
insufficient and have been improved.

											
										
										
											2019-10-30 01:12:22 +00:00
+								                                mAnattribs[probeOffset].Address;
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                        if (addrDiff == targetOffset - probeOffset) {
 								                            targetOffset = probeOffset;
-												Tweak "nearby" label finder

The code that found a nearby data target for an instruction operand
was searching backward but not forward.  We now take one step
forward, so that "LDA TABLE-1,Y" fills in automatically.

This altered 2008-address-changes, which had just this situation.
It didn't alter 2010-target-adjustment, but the existing tests were
insufficient and have been improved.

											
										
										
											2019-10-30 01:12:22 +00:00
+								                            break;
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                        } else {
-												Allow explicit widths in project/platform symbols, part 3

Implement multi-byte project/platform symbols by filling out a table
of addresses.  Each symbol is "painted" into the table, replacing
an existing entry if the new entry has higher priority.  This allows
us to handle overlapping entries, giving boosted priority to platform
symbols that are defined in .sym65 files loaded later.

The bounds on project/platform symbols are now rigidly defined.  If
the "nearby" feature is enabled, references to SYM-1 will be picked
up, but we won't go hunting for SYM+1 unless the symbol is at least
two bytes wide.

The cost of adding a symbol to the symbol table is about the same,
but we don't have a quick way to remove a symbol.

Previously, if two platform symbols had the same value, the symbol
with the alphabetically lowest label would win.  Now, the symbol
defined in the most-recently-loaded file wins.  (If you define two
symbols with the same value in the same file, it's still resolved
alphabetically.)  This allows the user to pick the winner by
arranging the load order of the platform symbol files.

Platform symbols now keep a reference to the file ident of the
symbol file that defined them, so we can show the symbols's source
in the Info panel.

These changes altered the behavior of test 2008-address-changes,
which includes some tests on external addresses that are close to
labeled internal addresses.  The previous behavior essentially
treated user labels as being 3 bytes wide and extending outside the
file bounds, which was mildly convenient on occasion but felt a
little skanky.  (We could do with a way to define external symbols
relative to internal symbols, for things like the source address of
code that gets relocated.)

Also, re-enabled some unit tests.

Also, added a bit of identifying stuff to CrashLog.txt.

											
										
										
											2019-10-02 23:26:05 +00:00
+								                            Debug.WriteLine("NOT probing past address boundary change (src=+" +
 								                                srcOffset.ToString("x6") +
 								                                " targ=+" + targetOffset.ToString("x6") +
 								                                " probe=+" + probeOffset.ToString("x6") + ")");
-												Tweak "nearby" label finder

The code that found a nearby data target for an instruction operand
was searching backward but not forward.  We now take one step
forward, so that "LDA TABLE-1,Y" fills in automatically.

This altered 2008-address-changes, which had just this situation.
It didn't alter 2010-target-adjustment, but the existing tests were
insufficient and have been improved.

											
										
										
											2019-10-30 01:12:22 +00:00
 								                            // No point in continuing to search this direction, but we might
 								                            // need to look the other way.
 								                            if (back) {
 								                                probeOffset = targetOffset;
 								                                back = false;
 								                            } else {
 								                                break;
 								                            }
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                        }
 								                    }
 								                }
 								                return targetOffset;
 								            }
 								            // Target is an instruction.  Is the source an instruction or data element
 								            // (e.g. ".dd2 <addr>").
 								            if (!mAnattribs[srcOffset].IsInstructionStart) {
 								                // Might be address-1 to set up an RTS.  If the target address isn't
 								                // an instruction start, check to see if the following byte is.
 								                if (!mAnattribs[targetOffset].IsInstructionStart &&
 								                        targetOffset + 1 < mAnattribs.Length &&
 								                        mAnattribs[targetOffset + 1].IsInstructionStart) {
 								                    LogD(srcOffset, "Offsetting address reference");
 								                    targetOffset++;
 								                }
 								                return targetOffset;
 								            }
 								            // Source is an instruction, so we have an instruction referencing an instruction.
 								            // Could be a branch, an address push, or self-modifying code.
 								            OpDef op = mProject.CpuDef.GetOpDef(mProject.FileData[srcOffset]);
 								            if (op.IsBranchOrSubCall) {
 								                // Don't mess with jumps and branches -- always go directly to the
 								                // target address.
 								            } else if (op == OpDef.OpPEA_StackAbs || op == OpDef.OpPER_StackPCRelLong) {
 								                // They might be pushing address-1 to set up an RTS.  If the target address isn't
 								                // an instruction start, check to see if the following byte is.
 								                if (!mAnattribs[targetOffset].IsInstructionStart &&
 								                        targetOffset + 1 < mAnattribs.Length &&
 								                        mAnattribs[targetOffset + 1].IsInstructionStart) {
 								                    LogD(srcOffset, "Offsetting PEA/PER");
 								                    targetOffset++;
 								                }
 								            } else {
 								                // Data operation (LDA, STA, etc).  This could be self-modifying code, or
 								                // an indexed access with an offset base address (LDA addr-1,Y) to an
 								                // adjacent data area.  Check to see if there's data right after this.
 								                bool nearbyData = false;
 								                for (int i = targetOffset + 1; i <= targetOffset + 2; i++) {
 								                    if (i < mAnattribs.Length && !mAnattribs[i].IsInstruction) {
 								                        targetOffset = i;
 								                        nearbyData = true;
 								                        break;
 								                    }
 								                }
 								                if (!nearbyData && !mAnattribs[targetOffset].IsInstructionStart) {
 								                    // There's no data nearby, and the target is not the start of the
 								                    // instruction, so this is probably self-modifying code.  We want
 								                    // the label to be on the opcode, so back up to the instruction start.
 								                    while (!mAnattribs[--targetOffset].IsInstructionStart) {
 								                        // Should not be possible to move past the start of the file,
 								                        // since we know we're in the middle of an instruction.
 								                        Debug.Assert(targetOffset > 0);
 								                    }
 								                }
 								            }
 								            if (targetOffset != origTargetOffset) {
 								                LogV(srcOffset, "Creating instruction ref adj=" +
 								                    (origTargetOffset - targetOffset));
 								            }
 								            return targetOffset;
 								        }
 								        /// <summary>
 								        /// Analyzes uncategorized regions of the file to see if they fit common patterns.
 								        ///
 								        /// This is re-run after most changes to the project, so we don't want to do anything
 								        /// crazily expensive.
 								        /// </summary>
 								        /// <returns>True on success.</returns>
 								        public void AnalyzeUncategorized() {
 								            FormatDescriptor oneByteDefault = FormatDescriptor.Create(1,
 								                FormatDescriptor.Type.Default, FormatDescriptor.SubType.None);
 								            FormatDescriptor.DebugPrefabBump(-1);
 								            // If it hasn't been identified as code or data, set the "data" flag to
 								            // give it a positive identification as data.  (This should be the only
 								            // place outside of CodeAnalysis that sets this flag.)  This isn't strictly
 								            // necessary, but it helps us assert things when pieces start moving around.
 								            for (int offset = 0; offset < mAnattribs.Length; offset++) {
 								                Anattrib attr = mAnattribs[offset];
 								                if (attr.IsInlineData) {
 								                    // While we're here, add a default format descriptor for inline data
 								                    // that doesn't have one.  We don't try to analyze it otherwise.
 								                    if (attr.DataDescriptor == null) {
 								                        mAnattribs[offset].DataDescriptor = oneByteDefault;
 								                        FormatDescriptor.DebugPrefabBump();
 								                    }
 								                } else if (!attr.IsInstruction) {
 								                    mAnattribs[offset].IsData = true;
 								                }
 								            }
 								            mDebugLog.LogI("Analyzing uncategorized data...");
 								            int startOffset = -1;
 								            for (int offset = 0; offset < mAnattribs.Length; ) {
 								                // We want to find a contiguous series of offsets which are not known
-												Correct handling of no-op .ORG statements

These were being overlooked because they didn't actually cause
anything to happen (a no-op .ORG sets the address to what it would
already have been).  The assembly source generator works in a way
that causes them to be skipped, so everybody was happy.

This seemed like the sort of thing that was likely to cause problems
down the road, however, so we now split regions correctly when a
no-op .ORG is encountered.  This affects the uncategorized data
analyzer and selection grouping.

This changed the behavior of the 2004-numeric-types test, which was
visibly weird in the UI but generated correct output.

Added the 2024-ui-edge-cases test to provide a place to exercise
edge cases when testing the UI by hand.  It has some value for the
automated regression test, so it's included there.

Also, changed the AddressMapEntry objects to be immutable.  This
is handy when passing lists of them around.

											
										
										
											2020-02-28 22:43:03 +00:00
+								                // to hold code or data.  We stop if we encounter a user-defined label,
 								                // format descriptor, or address override.
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                Anattrib attr = mAnattribs[offset];
 								                if (attr.IsInstruction || attr.IsInlineData || attr.IsDataStart) {
 								                    // Instruction, inline data, or formatted data known to be here.  Analyze
 								                    // previous chunk, then advance past this.
 								                    if (startOffset >= 0) {
 								                        AnalyzeRange(startOffset, offset - 1);
 								                        startOffset = -1;
 								                    }
 								                    if (attr.IsInstruction) {
 								                        // Because of embedded instructions, we can't simply leap forward.
 								                        // [or can we?]
 								                        offset++;
 								                    } else {
 								                        Debug.Assert(attr.Length > 0);
 								                        offset += attr.Length;
 								                    }
-												Visualizer improvements

Various changes:
- Generally treat visualization sets like long comments and notes
  when it comes to defining data region boundaries.  (We were doing
  this for selections; now we're also doing it for format-as-word
  and in the data analyzer when scanning for strings/fill.)
- Clear the visualization cache when the address map is altered.
  This is necessary for visualizers that dereference addresses.
- Read the Apple II screen image from a series of addresses rather
  than a series of offsets.  This allows it to work when the image
  is contiguous in memory but split into chunks in the file.
- Put 1 pixel of padding around the images in the main code list,
  so they don't blend into the background.
- Remember the last visualizer used, so we can re-use it the next
  time the user selects "new".
- Move min-size hack from Loaded to ContentRendered, as it apparently
  spoils CenterOwner placement.

											
										
										
											2019-12-06 22:49:35 +00:00
+								                } else if (attr.Symbol != null || mProject.HasCommentNoteOrVis(offset)) {
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                    // In an uncategorized area, but we want to break at this byte
 								                    // so the user or auto label doesn't get buried in the middle of
 								                    // a large chunk.
 								                    //
 								                    // This is similar to, but independent of, GroupedOffsetSetFromSelected()
 								                    // in ProjectView.  This is for auto-detection, the other is for user
 								                    // selection.  It's best if the two behave similarly though.
 								                    if (startOffset >= 0) {
 								                        AnalyzeRange(startOffset, offset - 1);
 								                    }
 								                    startOffset = offset;
 								                    offset++;
 								                } else {
 								                    // This offset is uncategorized, keep gathering.
 								                    if (startOffset < 0) {
 								                        startOffset = offset;
 								                    }
 								                    offset++;
-												Correct handling of no-op .ORG statements

These were being overlooked because they didn't actually cause
anything to happen (a no-op .ORG sets the address to what it would
already have been).  The assembly source generator works in a way
that causes them to be skipped, so everybody was happy.

This seemed like the sort of thing that was likely to cause problems
down the road, however, so we now split regions correctly when a
no-op .ORG is encountered.  This affects the uncategorized data
analyzer and selection grouping.

This changed the behavior of the 2004-numeric-types test, which was
visibly weird in the UI but generated correct output.

Added the 2024-ui-edge-cases test to provide a place to exercise
edge cases when testing the UI by hand.  It has some value for the
automated regression test, so it's included there.

Also, changed the AddressMapEntry objects to be immutable.  This
is handy when passing lists of them around.

											
										
										
											2020-02-28 22:43:03 +00:00
+								                    // Check to see if we just crossed an address change.
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                    if (offset < mAnattribs.Length &&
-												Correct handling of no-op .ORG statements

These were being overlooked because they didn't actually cause
anything to happen (a no-op .ORG sets the address to what it would
already have been).  The assembly source generator works in a way
that causes them to be skipped, so everybody was happy.

This seemed like the sort of thing that was likely to cause problems
down the road, however, so we now split regions correctly when a
no-op .ORG is encountered.  This affects the uncategorized data
analyzer and selection grouping.

This changed the behavior of the 2004-numeric-types test, which was
visibly weird in the UI but generated correct output.

Added the 2024-ui-edge-cases test to provide a place to exercise
edge cases when testing the UI by hand.  It has some value for the
automated regression test, so it's included there.

Also, changed the AddressMapEntry objects to be immutable.  This
is handy when passing lists of them around.

											
										
										
											2020-02-28 22:43:03 +00:00
+								                            !mProject.AddrMap.IsSingleAddrRange(offset - 1, 2)) {
 								                        // Must be an ORG here.  End region and scan.
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                        AnalyzeRange(startOffset, offset - 1);
 								                        startOffset = -1;
 								                    }
 								                }
 								            }
-												Correct handling of no-op .ORG statements

These were being overlooked because they didn't actually cause
anything to happen (a no-op .ORG sets the address to what it would
already have been).  The assembly source generator works in a way
that causes them to be skipped, so everybody was happy.

This seemed like the sort of thing that was likely to cause problems
down the road, however, so we now split regions correctly when a
no-op .ORG is encountered.  This affects the uncategorized data
analyzer and selection grouping.

This changed the behavior of the 2004-numeric-types test, which was
visibly weird in the UI but generated correct output.

Added the 2024-ui-edge-cases test to provide a place to exercise
edge cases when testing the UI by hand.  It has some value for the
automated regression test, so it's included there.

Also, changed the AddressMapEntry objects to be immutable.  This
is handy when passing lists of them around.

											
										
										
											2020-02-28 22:43:03 +00:00
 								            // Do the last bit.
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								            if (startOffset >= 0) {
 								                AnalyzeRange(startOffset, mAnattribs.Length - 1);
 								            }
 								        }
 								        /// <summary>
 								        /// Analyzes a range of bytes, looking for opportunities to promote uncategorized
 								        /// data to a more structured form.
 								        /// </summary>
 								        /// <param name="start">Offset of first byte in range.</param>
 								        /// <param name="end">Offset of last byte in range.</param>
 								        private void AnalyzeRange(int start, int end) {
 								            // We want to identify runs of identical bytes, and runs of more than N human-
 								            // readable characters (ASCII, high ASCII, PETSCII, whatever).  There are a few
 								            // ways to do this.
 								            //
 								            // The simple approach is to walk through the data from start to end, checking at
 								            // each offset for runs of bytes matching the criteria.  Because the data doesn't
 								            // change, we can pre-analyze the data at project load time to speed things up.
 								            //
 								            // One approach is to put runs into TypedRangeSet (setting the type to the byte
 								            // value so a run of 0x00 doesn't merge into an adjacent run of 0x01), and the
 								            // various character encodings into individual RangeSets.  Then, for any given
 								            // byte address, you can query the length of a potential run directly.  This could
 								            // be made faster with a mergesort-like algorithm that walked through the various
 								            // range sets, rather than iterating over every byte in the range.  However, the
 								            // ranges passed into this method tend to be small, so the initial setup time for
 								            // each region can dominate the performance.  (The optimized implementation of this
 								            // approach is also fairly complicated.)
 								            //
 								            // A memory-hungry alternative is to create arrays of integers, one entry per byte
 								            // in the file, and set each entry to the number of bytes in the run that would
 								            // follow at that point.  So if a run of 20 zeroes began at off set 5, you would
 								            // set run[5]=20, run[6]=19, and so on.  That avoids searching in the sets, at the
 								            // cost of potentially several megabytes for a large 65816 file.
 								            //
 								            // It's even possible that Regex would handle this faster and more easily.  This
 								            // can be done fairly quickly with "unsafe" code, e.g.:
 								            //   https://stackoverflow.com/questions/3028768/net-regular-expressions-on-bytes-instead-of-chars
 								            //   https://stackoverflow.com/questions/1660694/regular-expression-to-match-any-character-being-repeated-more-than-10-times
 								            //
 								            // Ultimately we're just not spending that much time here.  Setting
 								            // AnalyzeUncategorizedData=false reveals that most of the time is spent in
 								            // the caller, identifying the regions, so a significant improvement here won't
 								            // have much impact on the user experience.
 								            //
 								            // Vague idea: figure out how to re-use the results from the previous analysis
 								            // pass.  At a superficial level we can cache the result of calling here with a
 								            // particular (start, end) pair.  At a higher level we may be able to avoid
 								            // the search for uncategorized data, certainly at the bank level, possibly within
 								            // a bank.
 								            mDebugLog.LogI("Analyzing [+" + start.ToString("x6") + ",+" + end.ToString("x6") +"]");
 								            FormatDescriptor oneByteDefault = FormatDescriptor.Create(1,
 								                        FormatDescriptor.Type.Default, FormatDescriptor.SubType.None);
 								            FormatDescriptor.DebugPrefabBump(-1);
 								            if (!mAnalysisParams.AnalyzeUncategorizedData) {
 								                // Analysis is disabled, so just mark everything as single-byte data.
 								                while (start <= end) {
 								                    mAnattribs[start].DataDescriptor = oneByteDefault;
 								                    FormatDescriptor.DebugPrefabBump();
 								                    start++;
 								                }
 								                return;
 								            }
 								            int minStringChars = mAnalysisParams.MinCharsForString;
-												Add multiple encoding support to uncategorized data analyzer

The code that searches for character strings in uncategorized data
now recognizes the C64 encodings when selected in the project
properties.

The new code avoids some redundant comparisons when runs of
printable characters are found.  I suspect the new implementation
loses on overall performance because we're now calling through
delegates instead of testing characters directly, but I haven't
tested for that.

											
										
										
											2019-08-13 21:08:27 +00:00
+								#if DATA_PRESCAN   // this is actually slower (and uses more memory)
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								            while (start <= end) {
 								                // This is used to let us skip forward.  It starts past the end of the block,
 								                // and moves backward as we identify potential points of interest.
 								                int minNextStart = end + 1;
 								                bool found = mProject.RepeatedBytes.GetContainingOrSubsequentRange(start,
 								                        out TypedRangeSet.TypedRange tyRange);
 								                if (found) {
 								                    if (tyRange.Low <= start) {
 								                        // found a matching range
 								                        Debug.Assert(tyRange.Low <= start && tyRange.High >= start);
 								                        int clampEnd = Math.Min(tyRange.High, end);
 								                        int repLen = clampEnd - start + 1;
 								                        if (repLen >= MIN_RUN_LENGTH) {
 								                            bool isAscii =
 								                                TextUtil.IsPrintableAscii((char)(mFileData[start] & 0x7f));
 								                            // IF the run isn't ASCII, OR it's so long that we don't want to
 								                            // encode it as a string, OR it's so short that we don't want to
 								                            // treat it as a string, THEN output it as a run.  Otherwise, just
 								                            // let the ASCII-catcher handle it later.
 								                            if (!isAscii ||
 								                                    repLen > MIN_RUN_LENGTH_ASCII || repLen < minStringChars) {
 								                                LogV(start, "Run of 0x" + mFileData[start].ToString("x2") + ": " +
 								                                    repLen + " bytes");
 								                                mAnattribs[start].DataDescriptor = FormatDescriptor.Create(
 								                                    repLen, FormatDescriptor.Type.Fill,
 								                                    FormatDescriptor.SubType.None);
 								                                start += repLen;
 								                                continue;
 								                            }
 								                        }
 								                        // We didn't like this range.  We probably won't like it for any other
 								                        // point within the range, so start again past it.  Ideally we'd use
 								                        // Range.Low of the range that followed the one that was returned, but
 								                        // we don't have that handy.
 								                        minNextStart = Math.Min(minNextStart, tyRange.High + 1);
 								                    } else {
 								                        // no match; try to advance to the start of the next range.
 								                        Debug.Assert(tyRange.Low > start);
 								                        minNextStart = Math.Min(minNextStart, tyRange.Low);
 								                    }
 								                }
 								                found = mProject.StdAsciiBytes.GetContainingOrSubsequentRange(start,
 								                        out RangeSet.Range range);
 								                if (found) {
 								                    if (range.Low <= start) {
 								                        // found a matching range
 								                        Debug.Assert(range.Low <= start && range.High >= start);
 								                        int clampEnd = Math.Min(range.High, end);
 								                        int repLen = clampEnd - start + 1;
 								                        if (repLen >= minStringChars) {
 								                            LogV(start, "Std ASCII string, len=" + repLen + " bytes");
 								                            mAnattribs[start].DataDescriptor = FormatDescriptor.Create(repLen,
 								                                FormatDescriptor.Type.String, FormatDescriptor.SubType.None);
 								                            start += repLen;
 								                            continue;
 								                        }
 								                        minNextStart = Math.Min(minNextStart, range.High + 1);
 								                    } else {
 								                        Debug.Assert(range.Low > start);
 								                        minNextStart = Math.Min(minNextStart, range.Low);
 								                    }
 								                }
 								                found = mProject.HighAsciiBytes.GetContainingOrSubsequentRange(start,
 								                        out range);
 								                if (found) {
 								                    if (range.Low <= start) {
 								                        // found a matching range
 								                        Debug.Assert(range.Low <= start && range.High >= start);
 								                        int clampEnd = Math.Min(range.High, end);
 								                        int repLen = clampEnd - start + 1;
 								                        if (repLen >= minStringChars) {
 								                            LogV(start, "High ASCII string, len=" + repLen + " bytes");
 								                            mAnattribs[start].DataDescriptor = FormatDescriptor.Create(repLen,
 								                                FormatDescriptor.Type.String, FormatDescriptor.SubType.None);
 								                            start += repLen;
 								                            continue;
 								                        }
 								                        minNextStart = Math.Min(minNextStart, range.High + 1);
 								                    } else {
 								                        Debug.Assert(range.Low > start);
 								                        minNextStart = Math.Min(minNextStart, range.Low);
 								                    }
 								                }
 								                // Advance to the next possible run location.
 								                int nextStart = minNextStart > 0 ? minNextStart : start + 1;
 								                Debug.Assert(nextStart > start);
 								                // No runs found, output as single bytes.  This is the easiest form for users
 								                // to edit.
 								                while (start < nextStart) {
 								                    mAnattribs[start].DataDescriptor = oneByteDefault;
 								                    FormatDescriptor.DebugPrefabBump();
 								                    start++;
 								                }
 								            }
 								#else
-												Add multiple encoding support to uncategorized data analyzer

The code that searches for character strings in uncategorized data
now recognizes the C64 encodings when selected in the project
properties.

The new code avoids some redundant comparisons when runs of
printable characters are found.  I suspect the new implementation
loses on overall performance because we're now calling through
delegates instead of testing characters directly, but I haven't
tested for that.

											
										
										
											2019-08-13 21:08:27 +00:00
+								            // Select "is printable" test.  We use the extended version to include some
 								            // control characters.
-												Instruction operand editor rework, part 1

Rearrange the UI elements, and convert the code-behind to a more
XAML-style form.  The basic stuff works, but the old "shortcut"
system is still in the process of being replaced.

											
										
										
											2019-09-07 20:39:22 +00:00
+								            // TODO(maybe): require some *actually* printable characters in each string
-												Add multiple encoding support to uncategorized data analyzer

The code that searches for character strings in uncategorized data
now recognizes the C64 encodings when selected in the project
properties.

The new code avoids some redundant comparisons when runs of
printable characters are found.  I suspect the new implementation
loses on overall performance because we're now calling through
delegates instead of testing characters directly, but I haven't
tested for that.

											
										
										
											2019-08-13 21:08:27 +00:00
+								            CharEncoding.InclusionTest testPrintable;
 								            FormatDescriptor.SubType baseSubType;
 								            switch (mAnalysisParams.DefaultTextScanMode) {
 								                case TextScanMode.LowAscii:
 								                    testPrintable = CharEncoding.IsExtendedAscii;
 								                    baseSubType = FormatDescriptor.SubType.Ascii;
 								                    break;
 								                case TextScanMode.LowHighAscii:
 								                    testPrintable = CharEncoding.IsExtendedLowOrHighAscii;
 								                    baseSubType = FormatDescriptor.SubType.ASCII_GENERIC;
 								                    break;
 								                case TextScanMode.C64Petscii:
-												Replace on-screen string operand formatting

The previous functions just grabbed 62 characters and slapped quotes
on the ends, but that doesn't work if we want to show strings with
embedded control characters.  This change replaces the simple
formatter with the one used to generate assembly source code.  This
increases the cost of refreshing the display list, so a cache will
need to be added in a future change.

Converters for C64 PETSCII and C64 Screen Code have been defined.
The results of changing the auto-scan encoding can now be viewed.

The string operand formatter was using a single delimiter, but for
the on-screen version we want open-quote and close-quote, and might
want to identify some encodings with a prefix.  The formatter now
takes a class that defines the various parts.  (It might be worth
replacing the delimiter patterns recently added for single-character
operands with this, so we don't have two mechanisms for very nearly
the same thing.)

While working on this change I remembered why there were two kinds
of "reverse" in the old Merlin 32 string operand generator: what you
want for assembly code is different from what you want on screen.
The ReverseMode enum has been resurrected.

											
										
										
											2019-08-14 00:22:21 +00:00
+								                    testPrintable = CharEncoding.IsExtendedC64Petscii;
-												Add multiple encoding support to uncategorized data analyzer

The code that searches for character strings in uncategorized data
now recognizes the C64 encodings when selected in the project
properties.

The new code avoids some redundant comparisons when runs of
printable characters are found.  I suspect the new implementation
loses on overall performance because we're now calling through
delegates instead of testing characters directly, but I haven't
tested for that.

											
										
										
											2019-08-13 21:08:27 +00:00
+								                    baseSubType = FormatDescriptor.SubType.C64Petscii;
 								                    break;
 								                case TextScanMode.C64ScreenCode:
-												Replace on-screen string operand formatting

The previous functions just grabbed 62 characters and slapped quotes
on the ends, but that doesn't work if we want to show strings with
embedded control characters.  This change replaces the simple
formatter with the one used to generate assembly source code.  This
increases the cost of refreshing the display list, so a cache will
need to be added in a future change.

Converters for C64 PETSCII and C64 Screen Code have been defined.
The results of changing the auto-scan encoding can now be viewed.

The string operand formatter was using a single delimiter, but for
the on-screen version we want open-quote and close-quote, and might
want to identify some encodings with a prefix.  The formatter now
takes a class that defines the various parts.  (It might be worth
replacing the delimiter patterns recently added for single-character
operands with this, so we don't have two mechanisms for very nearly
the same thing.)

While working on this change I remembered why there were two kinds
of "reverse" in the old Merlin 32 string operand generator: what you
want for assembly code is different from what you want on screen.
The ReverseMode enum has been resurrected.

											
										
										
											2019-08-14 00:22:21 +00:00
+								                    testPrintable = CharEncoding.IsExtendedC64ScreenCode;
-												Add multiple encoding support to uncategorized data analyzer

The code that searches for character strings in uncategorized data
now recognizes the C64 encodings when selected in the project
properties.

The new code avoids some redundant comparisons when runs of
printable characters are found.  I suspect the new implementation
loses on overall performance because we're now calling through
delegates instead of testing characters directly, but I haven't
tested for that.

											
										
										
											2019-08-13 21:08:27 +00:00
+								                    baseSubType = FormatDescriptor.SubType.C64Screen;
 								                    break;
 								                default:
 								                    Debug.Assert(false);
 								                    testPrintable = CharEncoding.IsExtendedLowOrHighAscii;
 								                    baseSubType = FormatDescriptor.SubType.ASCII_GENERIC;
 								                    break;
 								            }
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								            while (start <= end) {
 								                // Check for block of repeated values.
 								                int runLen = RecognizeRun(mFileData, start, end);
-												Add multiple encoding support to uncategorized data analyzer

The code that searches for character strings in uncategorized data
now recognizes the C64 encodings when selected in the project
properties.

The new code avoids some redundant comparisons when runs of
printable characters are found.  I suspect the new implementation
loses on overall performance because we're now calling through
delegates instead of testing characters directly, but I haven't
tested for that.

											
										
										
											2019-08-13 21:08:27 +00:00
+								                int printLen = 0;
 								                FormatDescriptor.SubType subType = baseSubType;
 								                if (testPrintable(mFileData[start])) {
 								                    // The run byte is printable, and the run is shorter than a line.  It's
 								                    // possible the run is followed by additional printable characters, e.g.
 								                    // "*****hello".  Text is easier for humans to understand, so we prefer
 								                    // that unless the run is longer than one line.
 								                    if (runLen <= MAX_STRING_RUN_LENGTH) {
 								                        // See if the run is followed by additional printable characters.
 								                        printLen = runLen;
 								                        // For LowHighAscii we allow a string to be either low or high, but it
 								                        // must be entirely one thing.  Refine our test.
 								                        CharEncoding.InclusionTest refinedTest = testPrintable;
 								                        if (mAnalysisParams.DefaultTextScanMode == TextScanMode.LowHighAscii) {
 								                            if (CharEncoding.IsExtendedAscii(mFileData[start])) {
 								                                refinedTest = CharEncoding.IsExtendedAscii;
 								                                subType = FormatDescriptor.SubType.Ascii;
 								                            } else {
 								                                refinedTest = CharEncoding.IsExtendedHighAscii;
 								                                subType = FormatDescriptor.SubType.HighAscii;
 								                            }
 								                        }
 								                        for (int i = start + runLen; i <= end; i++) {
 								                            if (!refinedTest(mFileData[i])) {
 								                                break;
 								                            }
 								                            printLen++;
 								                        }
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                    }
 								                }
-												Add multiple encoding support to uncategorized data analyzer

The code that searches for character strings in uncategorized data
now recognizes the C64 encodings when selected in the project
properties.

The new code avoids some redundant comparisons when runs of
printable characters are found.  I suspect the new implementation
loses on overall performance because we're now calling through
delegates instead of testing characters directly, but I haven't
tested for that.

											
										
										
											2019-08-13 21:08:27 +00:00
+								                if (printLen >= minStringChars) {
 								                    // This either a short run followed by printable characters, or just a
 								                    // (possibly very large) bunch of printable characters.
 								                    Debug.Assert(subType != FormatDescriptor.SubType.ASCII_GENERIC);
 								                    LogD(start, "Character string (" + subType + "), len=" + printLen + " bytes");
 								                    mAnattribs[start].DataDescriptor = FormatDescriptor.Create(printLen,
 								                        FormatDescriptor.Type.StringGeneric, subType);
 								                    start += printLen;
 								                } else if (runLen >= MIN_RUN_LENGTH) {
 								                    // Didn't qualify as a string, but it's long enough to be a run.
 								                    //
 								                    // TODO(someday): allow .fill pseudo-ops to have character encoding
 								                    //   sub-types, so we can ".fill 64,'*'".  Easy to do here, but
 								                    //   proper treatment requires tweaking data operand editor to allow
 								                    //   char encoding to be specified.
 								                    LogV(start, "Run of 0x" + mFileData[start].ToString("x2") + ": " +
 								                        runLen + " bytes");
 								                    mAnattribs[start].DataDescriptor = FormatDescriptor.Create(
 								                        runLen, FormatDescriptor.Type.Fill,
 								                        FormatDescriptor.SubType.None);
 								                    start += runLen;
 								                } else {
 								                    // Nothing useful found, output 1+ values as single bytes.  This is the
 								                    // easiest form for users to edit.  If we found a run, but it was too short,
 								                    // we can go ahead and mark all bytes in the run because we know the later
 								                    // matches will also be too short.
 								                    Debug.Assert(runLen > 0);
 								                    while (runLen-- != 0) {
 								                        mAnattribs[start++].DataDescriptor = oneByteDefault;
 								                        FormatDescriptor.DebugPrefabBump();
 								                    }
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                }
 								            }
 								#endif
-												Add multiple encoding support to uncategorized data analyzer

The code that searches for character strings in uncategorized data
now recognizes the C64 encodings when selected in the project
properties.

The new code avoids some redundant comparisons when runs of
printable characters are found.  I suspect the new implementation
loses on overall performance because we're now calling through
delegates instead of testing characters directly, but I haven't
tested for that.

											
										
										
											2019-08-13 21:08:27 +00:00
+								        }
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
-												Add C64 encodings to instruction and data operand editors

Both dialogs got a couple extra radio buttons for selection of
single character operands.  The data operand editor got a combo box
that lets you specify how it scans for viable strings.

Various string scanning methods were made more generic.  This got a
little strange with auto-detection of low/high ASCII, but that was
mostly a matter of keeping the previous code around as a special
case.

Made C64 Screen Code DCI strings a thing that works.

											
										
										
											2019-08-16 00:53:12 +00:00
+								        #region Static analyzer methods
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
 								        /// <summary>
 								        /// Checks for a repeated run of the same byte.
 								        /// </summary>
 								        /// <param name="fileData">Raw data.</param>
 								        /// <param name="start">Offset of first byte in range.</param>
 								        /// <param name="end">Offset of last byte in range.</param>
 								        /// <returns>Length of run.</returns>
 								        public static int RecognizeRun(byte[] fileData, int start, int end) {
 								            byte first = fileData[start];
 								            int index = start;
 								            while (++index <= end) {
 								                if (fileData[index] != first) {
 								                    break;
 								                }
 								            }
 								            return index - start;
 								        }
 								        /// <summary>
 								        /// Counts the number of low-ASCII, high-ASCII, and non-ASCII values in the
 								        /// specified region.
 								        /// </summary>
 								        /// <param name="fileData">Raw data.</param>
 								        /// <param name="start">Offset of first byte in range.</param>
 								        /// <param name="end">Offset of last byte in range</param>
-												Add C64 encodings to instruction and data operand editors

Both dialogs got a couple extra radio buttons for selection of
single character operands.  The data operand editor got a combo box
that lets you specify how it scans for viable strings.

Various string scanning methods were made more generic.  This got a
little strange with auto-detection of low/high ASCII, but that was
mostly a matter of keeping the previous code around as a special
case.

Made C64 Screen Code DCI strings a thing that works.

											
										
										
											2019-08-16 00:53:12 +00:00
+								        /// <param name="charTest">Character test delegate.  Must match on both high and
 								        ///   low characters.</param>
 								        /// <param name="lowVal">Set to the number of low-range characters found.</param>
 								        /// <param name="highVal">Set to the number of high-range characters found.</param>
 								        /// <param name="nonChar">Set to the number of non-character bytes found.</param>
 								        public static void CountHighLowBytes(byte[] fileData, int start, int end,
 								                CharEncoding.InclusionTest charTest,
 								                out int lowVal, out int highVal, out int nonChar) {
 								            lowVal = highVal = nonChar = 0;
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
 								            for (int i = start; i <= end; i++) {
 								                byte val = fileData[i];
-												Add C64 encodings to instruction and data operand editors

Both dialogs got a couple extra radio buttons for selection of
single character operands.  The data operand editor got a combo box
that lets you specify how it scans for viable strings.

Various string scanning methods were made more generic.  This got a
little strange with auto-detection of low/high ASCII, but that was
mostly a matter of keeping the previous code around as a special
case.

Made C64 Screen Code DCI strings a thing that works.

											
										
										
											2019-08-16 00:53:12 +00:00
+								                if (!charTest(val)) {
 								                    nonChar++;
 								                } else if ((val & 0x80) == 0) {
 								                    lowVal++;
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                } else {
-												Add C64 encodings to instruction and data operand editors

Both dialogs got a couple extra radio buttons for selection of
single character operands.  The data operand editor got a combo box
that lets you specify how it scans for viable strings.

Various string scanning methods were made more generic.  This got a
little strange with auto-detection of low/high ASCII, but that was
mostly a matter of keeping the previous code around as a special
case.

Made C64 Screen Code DCI strings a thing that works.

											
										
										
											2019-08-16 00:53:12 +00:00
+								                    highVal++;
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                }
 								            }
 								        }
-												Add C64 encodings to instruction and data operand editors

Both dialogs got a couple extra radio buttons for selection of
single character operands.  The data operand editor got a combo box
that lets you specify how it scans for viable strings.

Various string scanning methods were made more generic.  This got a
little strange with auto-detection of low/high ASCII, but that was
mostly a matter of keeping the previous code around as a special
case.

Made C64 Screen Code DCI strings a thing that works.

											
										
										
											2019-08-16 00:53:12 +00:00
+								        /// <summary>
 								        /// Counts the number of bytes that match the character test.
 								        /// </summary>
 								        /// <param name="fileData">Raw data.</param>
 								        /// <param name="start">Offset of first byte in range.</param>
 								        /// <param name="end">Offset of last byte in range.</param>
 								        /// <param name="charTest">Character test delegate.</param>
 								        /// <returns>Number of matching characters.</returns>
 								        public static int CountCharacterBytes(byte[] fileData, int start, int end,
 								                CharEncoding.InclusionTest charTest) {
 								            int count = 0;
 								            for (int i = start; i <= end; i++) {
 								                if (charTest(fileData[i])) {
 								                    count++;
 								                }
 								            }
 								            return count;
 								        }
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								        /// <summary>
 								        /// Counts the number of null-terminated strings in the buffer.
 								        ///
 								        /// Zero-length strings are allowed but not included in the count.
 								        /// </summary>
 								        /// <param name="fileData">Raw data.</param>
 								        /// <param name="start">Offset of first byte in range.</param>
 								        /// <param name="end">Offset of last byte in range.</param>
-												Add C64 encodings to instruction and data operand editors

Both dialogs got a couple extra radio buttons for selection of
single character operands.  The data operand editor got a combo box
that lets you specify how it scans for viable strings.

Various string scanning methods were made more generic.  This got a
little strange with auto-detection of low/high ASCII, but that was
mostly a matter of keeping the previous code around as a special
case.

Made C64 Screen Code DCI strings a thing that works.

											
										
										
											2019-08-16 00:53:12 +00:00
+								        /// <param name="charTest">Character test delegate.</param>
 								        /// <param name="limitHiBit">If set, the high bit in all character must be the
 								        ///   same.  Used to enforce a single encoding when "low or high ASCII" is used.</param>
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								        /// <returns>Number of strings found, or -1 if bad data identified.</returns>
-												Add C64 encodings to instruction and data operand editors

Both dialogs got a couple extra radio buttons for selection of
single character operands.  The data operand editor got a combo box
that lets you specify how it scans for viable strings.

Various string scanning methods were made more generic.  This got a
little strange with auto-detection of low/high ASCII, but that was
mostly a matter of keeping the previous code around as a special
case.

Made C64 Screen Code DCI strings a thing that works.

											
										
										
											2019-08-16 00:53:12 +00:00
+								        public static int RecognizeNullTerminatedStrings(byte[] fileData, int start, int end,
 								                CharEncoding.InclusionTest charTest, bool limitHiBit) {
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								            // Quick test.
 								            if (fileData[end] != 0x00) {
 								                return -1;
 								            }
 								            int stringCount = 0;
 								            int expectedHiBit = -1;
 								            int stringLen = 0;
 								            for (int i = start; i <= end; i++) {
 								                byte val = fileData[i];
 								                if (val == 0x00) {
 								                    // End of string.  Only update count if string wasn't empty.
 								                    if (stringLen != 0) {
 								                        stringCount++;
 								                    }
 								                    stringLen = 0;
 								                    expectedHiBit = -1;
 								                } else {
-												Add C64 encodings to instruction and data operand editors

Both dialogs got a couple extra radio buttons for selection of
single character operands.  The data operand editor got a combo box
that lets you specify how it scans for viable strings.

Various string scanning methods were made more generic.  This got a
little strange with auto-detection of low/high ASCII, but that was
mostly a matter of keeping the previous code around as a special
case.

Made C64 Screen Code DCI strings a thing that works.

											
										
										
											2019-08-16 00:53:12 +00:00
+								                    if (limitHiBit) {
 								                        if (expectedHiBit == -1) {
 								                            // First byte in string, set hi/lo expectation.
 								                            expectedHiBit = val & 0x80;
 								                        } else if ((val & 0x80) != expectedHiBit) {
 								                            // Mixed ASCII or non-ASCII, fail.
 								                            return -1;
 								                        }
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                    }
-												Don't reject strings with "invalid" characters

When formatting one or more strings with the Edit Data Operand dialog,
the code must determine which options to present.  If the selected
bytes appear to represent one or more null-terminated strings, that
option is enabled in the UI.

The "format recognizers" enforce some strict rules, e.g. null-
terminated strings must end in $00, and also try to confirm that the
data looks like a printable string.  The algorithm rejects strings
with "illegal" characters in them.  This is simpler on some systems
than others.  For example, C64 PETSCII defines quite a few control
characters in ways that make them useful for embedding in printable
strings.

The "recognizers" are only used by the operand edit feature, not as
part of an automated string detector, so there's no real upside in
overriding the user's desire to form a string with arbitrary bytes.

This removes the quick rejection from the four recognizers (null-term,
len8, len16, dci).  It does not alter the high-level code, which
still insists on a certain percentage of the string being printable;
that may be worth revisiting as well.

(issue #100)

											
										
										
											2021-08-02 00:36:05 +00:00
+								                    //if (!charTest(val)) {
 								                    //    // Not a matching character, fail.
 								                    //    return -1;
 								                    //}
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                    stringLen++;
 								                }
 								            }
 								            return stringCount;
 								        }
 								        /// <summary>
 								        /// Counts strings prefixed with an 8-bit length.
 								        ///
 								        /// Zero-length strings are allowed but not counted.
 								        /// </summary>
 								        /// <param name="fileData">Raw data.</param>
 								        /// <param name="start">Offset of first byte in range.</param>
 								        /// <param name="end">Offset of last byte in range.</param>
-												Add C64 encodings to instruction and data operand editors

Both dialogs got a couple extra radio buttons for selection of
single character operands.  The data operand editor got a combo box
that lets you specify how it scans for viable strings.

Various string scanning methods were made more generic.  This got a
little strange with auto-detection of low/high ASCII, but that was
mostly a matter of keeping the previous code around as a special
case.

Made C64 Screen Code DCI strings a thing that works.

											
										
										
											2019-08-16 00:53:12 +00:00
+								        /// <param name="charTest">Character test delegate.</param>
 								        /// <param name="limitHiBit">If set, the high bit in all character must be the
 								        ///   same.  Used to enforce a single encoding when "low or high ASCII" is used.</param>
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								        /// <returns>Number of strings found, or -1 if bad data identified.</returns>
-												Add C64 encodings to instruction and data operand editors

Both dialogs got a couple extra radio buttons for selection of
single character operands.  The data operand editor got a combo box
that lets you specify how it scans for viable strings.

Various string scanning methods were made more generic.  This got a
little strange with auto-detection of low/high ASCII, but that was
mostly a matter of keeping the previous code around as a special
case.

Made C64 Screen Code DCI strings a thing that works.

											
										
										
											2019-08-16 00:53:12 +00:00
+								        public static int RecognizeLen8Strings(byte[] fileData, int start, int end,
 								                CharEncoding.InclusionTest charTest, bool limitHiBit) {
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								            int posn = start;
 								            int remaining = end - start + 1;
 								            int stringCount = 0;
 								            while (remaining > 0) {
 								                int strLen = fileData[posn++];
 								                if (strLen > --remaining) {
 								                    // Buffer doesn't hold entire string, fail.
 								                    return -1;
 								                }
 								                if (strLen == 0) {
 								                    continue;
 								                }
 								                stringCount++;
 								                remaining -= strLen;
 								                int expectedHiBit = fileData[posn] & 0x80;
 								                while (strLen-- != 0) {
 								                    byte val = fileData[posn++];
-												Add C64 encodings to instruction and data operand editors

Both dialogs got a couple extra radio buttons for selection of
single character operands.  The data operand editor got a combo box
that lets you specify how it scans for viable strings.

Various string scanning methods were made more generic.  This got a
little strange with auto-detection of low/high ASCII, but that was
mostly a matter of keeping the previous code around as a special
case.

Made C64 Screen Code DCI strings a thing that works.

											
										
										
											2019-08-16 00:53:12 +00:00
+								                    if (limitHiBit && (val & 0x80) != expectedHiBit) {
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                        // Mixed ASCII, fail.
 								                        return -1;
 								                    }
-												Don't reject strings with "invalid" characters

When formatting one or more strings with the Edit Data Operand dialog,
the code must determine which options to present.  If the selected
bytes appear to represent one or more null-terminated strings, that
option is enabled in the UI.

The "format recognizers" enforce some strict rules, e.g. null-
terminated strings must end in $00, and also try to confirm that the
data looks like a printable string.  The algorithm rejects strings
with "illegal" characters in them.  This is simpler on some systems
than others.  For example, C64 PETSCII defines quite a few control
characters in ways that make them useful for embedding in printable
strings.

The "recognizers" are only used by the operand edit feature, not as
part of an automated string detector, so there's no real upside in
overriding the user's desire to form a string with arbitrary bytes.

This removes the quick rejection from the four recognizers (null-term,
len8, len16, dci).  It does not alter the high-level code, which
still insists on a certain percentage of the string being printable;
that may be worth revisiting as well.

(issue #100)

											
										
										
											2021-08-02 00:36:05 +00:00
+								                    //if (!charTest(val)) {
 								                    //    // Not a matching character, fail.
 								                    //    return -1;
 								                    //}
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                }
 								            }
 								            return stringCount;
 								        }
 								        /// <summary>
 								        /// Counts strings prefixed with a 16-bit length.
 								        ///
 								        /// Zero-length strings are allowed but not counted.
 								        /// </summary>
 								        /// <param name="fileData">Raw data.</param>
 								        /// <param name="start">Offset of first byte in range.</param>
 								        /// <param name="end">Offset of last byte in range.</param>
-												Add C64 encodings to instruction and data operand editors

Both dialogs got a couple extra radio buttons for selection of
single character operands.  The data operand editor got a combo box
that lets you specify how it scans for viable strings.

Various string scanning methods were made more generic.  This got a
little strange with auto-detection of low/high ASCII, but that was
mostly a matter of keeping the previous code around as a special
case.

Made C64 Screen Code DCI strings a thing that works.

											
										
										
											2019-08-16 00:53:12 +00:00
+								        /// <param name="charTest">Character test delegate.</param>
 								        /// <param name="limitHiBit">If set, the high bit in all character must be the
 								        ///   same.  Used to enforce a single encoding when "low or high ASCII" is used.</param>
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								        /// <returns>Number of strings found, or -1 if bad data identified.</returns>
-												Add C64 encodings to instruction and data operand editors

Both dialogs got a couple extra radio buttons for selection of
single character operands.  The data operand editor got a combo box
that lets you specify how it scans for viable strings.

Various string scanning methods were made more generic.  This got a
little strange with auto-detection of low/high ASCII, but that was
mostly a matter of keeping the previous code around as a special
case.

Made C64 Screen Code DCI strings a thing that works.

											
										
										
											2019-08-16 00:53:12 +00:00
+								        public static int RecognizeLen16Strings(byte[] fileData, int start, int end,
 								                CharEncoding.InclusionTest charTest, bool limitHiBit) {
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								            int posn = start;
 								            int remaining = end - start + 1;
 								            int stringCount = 0;
 								            while (remaining > 0) {
 								                if (remaining < 2) {
 								                    // Not enough bytes for length, fail.
 								                    return -1;
 								                }
 								                int strLen = fileData[posn++];
 								                strLen |= fileData[posn++] << 8;
 								                remaining -= 2;
 								                if (strLen > remaining) {
 								                    // Buffer doesn't hold entire string, fail.
 								                    return -1;
 								                }
 								                if (strLen == 0) {
 								                    continue;
 								                }
 								                stringCount++;
 								                remaining -= strLen;
 								                int expectedHiBit = fileData[posn] & 0x80;
 								                while (strLen-- != 0) {
 								                    byte val = fileData[posn++];
-												Add C64 encodings to instruction and data operand editors

Both dialogs got a couple extra radio buttons for selection of
single character operands.  The data operand editor got a combo box
that lets you specify how it scans for viable strings.

Various string scanning methods were made more generic.  This got a
little strange with auto-detection of low/high ASCII, but that was
mostly a matter of keeping the previous code around as a special
case.

Made C64 Screen Code DCI strings a thing that works.

											
										
										
											2019-08-16 00:53:12 +00:00
+								                    if (limitHiBit && (val & 0x80) != expectedHiBit) {
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                        // Mixed ASCII, fail.
 								                        return -1;
 								                    }
-												Don't reject strings with "invalid" characters

When formatting one or more strings with the Edit Data Operand dialog,
the code must determine which options to present.  If the selected
bytes appear to represent one or more null-terminated strings, that
option is enabled in the UI.

The "format recognizers" enforce some strict rules, e.g. null-
terminated strings must end in $00, and also try to confirm that the
data looks like a printable string.  The algorithm rejects strings
with "illegal" characters in them.  This is simpler on some systems
than others.  For example, C64 PETSCII defines quite a few control
characters in ways that make them useful for embedding in printable
strings.

The "recognizers" are only used by the operand edit feature, not as
part of an automated string detector, so there's no real upside in
overriding the user's desire to form a string with arbitrary bytes.

This removes the quick rejection from the four recognizers (null-term,
len8, len16, dci).  It does not alter the high-level code, which
still insists on a certain percentage of the string being printable;
that may be worth revisiting as well.

(issue #100)

											
										
										
											2021-08-02 00:36:05 +00:00
+								                    //if (!charTest(val)) {
 								                    //    // Not a matching character, fail.
 								                    //    return -1;
 								                    //}
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                }
 								            }
 								            return stringCount;
 								        }
 								        /// <summary>
 								        /// Counts strings in Dextral Character Inverted format, meaning the high bit on the
 								        /// last byte is the opposite of the preceding.
 								        ///
-												Allow single-character DCI strings

The DCI string format uses character values where the high bit of the
last byte differs from the rest of the string.  Usually all the high
bits are clear except on the last byte, but SourceGen generally allows
either polarity.

This gets a little uncertain with single-character strings, because
SourceGen can't auto-detect DCI very effectively.  A series of bytes
with the high bit set could be a single high-ASCII string or a series
of single-byte DCI strings.

The motivation for allowing them is C64 PETSCII.  While ASCII allows
"high ASCII" as an escape hatch, PETSCII doesn't have that option, so
there's no way to mark the data as a character or a string.  We still
want to do a bit of screening, but if the user specifies a non-ASCII
character set and the selected bytes have their high bits set, we
want to just treat the whole set as 1-byte DCI.

Some minor adjustments were needed for a couple of validity checks
that expected longer strings.

This adds some short DCI strings in different character sets to the
char-encoding regression tests.

(for issue #102)

											
										
										
											2021-08-08 22:38:39 +00:00
+								        /// To reduce false-positives, we require that all strings have the same hi/lo pattern.
 								        ///
 								        /// Single-character strings are allowed for C64 PETSCII, which doesn't have an
 								        /// equivalent to "high ASCII" character formatting, so long as the terminating
 								        /// character value has its high bit set.  Without this restriction, any collection
 								        /// of characters is just a list of DCI strings, which is a weird thing to offer up
 								        /// in the UI.
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								        /// </summary>
-												Add C64 encodings to instruction and data operand editors

Both dialogs got a couple extra radio buttons for selection of
single character operands.  The data operand editor got a combo box
that lets you specify how it scans for viable strings.

Various string scanning methods were made more generic.  This got a
little strange with auto-detection of low/high ASCII, but that was
mostly a matter of keeping the previous code around as a special
case.

Made C64 Screen Code DCI strings a thing that works.

											
										
										
											2019-08-16 00:53:12 +00:00
+								        /// <remarks>
-												PETSCII does DCI

I didn't think it made sense, but I found something that used it,
so apparently it's a thing.  This updates the operand editor to
let you choose PETSCII+DCI, and updates the assemblers to handle
it correctly (really just 64tass, since the others either don't
have a DCI directive or don't deal with PETSCII at all).

Changed the char-encoding sample from "bad dcI" to "pet dcI", and
updated the documentation.

											
										
										
											2019-08-21 00:55:12 +00:00
+								        /// For C64Petscii, this will identify strings that are entirely in lower case except
 								        /// for the last letteR, or vice-versa.
-												Add C64 encodings to instruction and data operand editors

Both dialogs got a couple extra radio buttons for selection of
single character operands.  The data operand editor got a combo box
that lets you specify how it scans for viable strings.

Various string scanning methods were made more generic.  This got a
little strange with auto-detection of low/high ASCII, but that was
mostly a matter of keeping the previous code around as a special
case.

Made C64 Screen Code DCI strings a thing that works.

											
										
										
											2019-08-16 00:53:12 +00:00
+								        /// </remarks>
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								        /// <param name="fileData">Raw data.</param>
 								        /// <param name="start">Offset of first byte in range.</param>
 								        /// <param name="end">Offset of last byte in range.</param>
-												Add C64 encodings to instruction and data operand editors

Both dialogs got a couple extra radio buttons for selection of
single character operands.  The data operand editor got a combo box
that lets you specify how it scans for viable strings.

Various string scanning methods were made more generic.  This got a
little strange with auto-detection of low/high ASCII, but that was
mostly a matter of keeping the previous code around as a special
case.

Made C64 Screen Code DCI strings a thing that works.

											
										
										
											2019-08-16 00:53:12 +00:00
+								        /// <param name="charTest">Character test delegate.</param>
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								        /// <returns>Number of strings found, or -1 if bad data identified.</returns>
-												Add C64 encodings to instruction and data operand editors

Both dialogs got a couple extra radio buttons for selection of
single character operands.  The data operand editor got a combo box
that lets you specify how it scans for viable strings.

Various string scanning methods were made more generic.  This got a
little strange with auto-detection of low/high ASCII, but that was
mostly a matter of keeping the previous code around as a special
case.

Made C64 Screen Code DCI strings a thing that works.

											
										
										
											2019-08-16 00:53:12 +00:00
+								        public static int RecognizeDciStrings(byte[] fileData, int start, int end,
 								                CharEncoding.InclusionTest charTest) {
-												Allow single-character DCI strings

The DCI string format uses character values where the high bit of the
last byte differs from the rest of the string.  Usually all the high
bits are clear except on the last byte, but SourceGen generally allows
either polarity.

This gets a little uncertain with single-character strings, because
SourceGen can't auto-detect DCI very effectively.  A series of bytes
with the high bit set could be a single high-ASCII string or a series
of single-byte DCI strings.

The motivation for allowing them is C64 PETSCII.  While ASCII allows
"high ASCII" as an escape hatch, PETSCII doesn't have that option, so
there's no way to mark the data as a character or a string.  We still
want to do a bit of screening, but if the user specifies a non-ASCII
character set and the selected bytes have their high bits set, we
want to just treat the whole set as 1-byte DCI.

Some minor adjustments were needed for a couple of validity checks
that expected longer strings.

This adds some short DCI strings in different character sets to the
char-encoding regression tests.

(for issue #102)

											
										
										
											2021-08-08 22:38:39 +00:00
+								            int endHiBit = fileData[end] & 0x80;
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								            int stringCount = 0;
 								            int stringLen = 0;
 								            for (int i = start; i <= end; i++) {
 								                byte val = fileData[i];
-												Allow single-character DCI strings

The DCI string format uses character values where the high bit of the
last byte differs from the rest of the string.  Usually all the high
bits are clear except on the last byte, but SourceGen generally allows
either polarity.

This gets a little uncertain with single-character strings, because
SourceGen can't auto-detect DCI very effectively.  A series of bytes
with the high bit set could be a single high-ASCII string or a series
of single-byte DCI strings.

The motivation for allowing them is C64 PETSCII.  While ASCII allows
"high ASCII" as an escape hatch, PETSCII doesn't have that option, so
there's no way to mark the data as a character or a string.  We still
want to do a bit of screening, but if the user specifies a non-ASCII
character set and the selected bytes have their high bits set, we
want to just treat the whole set as 1-byte DCI.

Some minor adjustments were needed for a couple of validity checks
that expected longer strings.

This adds some short DCI strings in different character sets to the
char-encoding regression tests.

(for issue #102)

											
										
										
											2021-08-08 22:38:39 +00:00
+								                if ((val & 0x80) == endHiBit) {
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                    // end of string
 								                    if (stringLen == 0) {
-												Allow single-character DCI strings

The DCI string format uses character values where the high bit of the
last byte differs from the rest of the string.  Usually all the high
bits are clear except on the last byte, but SourceGen generally allows
either polarity.

This gets a little uncertain with single-character strings, because
SourceGen can't auto-detect DCI very effectively.  A series of bytes
with the high bit set could be a single high-ASCII string or a series
of single-byte DCI strings.

The motivation for allowing them is C64 PETSCII.  While ASCII allows
"high ASCII" as an escape hatch, PETSCII doesn't have that option, so
there's no way to mark the data as a character or a string.  We still
want to do a bit of screening, but if the user specifies a non-ASCII
character set and the selected bytes have their high bits set, we
want to just treat the whole set as 1-byte DCI.

Some minor adjustments were needed for a couple of validity checks
that expected longer strings.

This adds some short DCI strings in different character sets to the
char-encoding regression tests.

(for issue #102)

											
										
										
											2021-08-08 22:38:39 +00:00
+								                        // Got two consecutive bytes with end-marker polarity.  Allow if the
 								                        // end char high bit is set.  Otherwise it's just a sequence of
 								                        // regular characters.
 								                        if (endHiBit == 0) {
 								                            return -1;
 								                        }
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								                    }
 								                    stringCount++;
 								                    stringLen = 0;
 								                } else {
 								                    stringLen++;
 								                }
-												Don't reject strings with "invalid" characters

When formatting one or more strings with the Edit Data Operand dialog,
the code must determine which options to present.  If the selected
bytes appear to represent one or more null-terminated strings, that
option is enabled in the UI.

The "format recognizers" enforce some strict rules, e.g. null-
terminated strings must end in $00, and also try to confirm that the
data looks like a printable string.  The algorithm rejects strings
with "illegal" characters in them.  This is simpler on some systems
than others.  For example, C64 PETSCII defines quite a few control
characters in ways that make them useful for embedding in printable
strings.

The "recognizers" are only used by the operand edit feature, not as
part of an automated string detector, so there's no real upside in
overriding the user's desire to form a string with arbitrary bytes.

This removes the quick rejection from the four recognizers (null-term,
len8, len16, dci).  It does not alter the high-level code, which
still insists on a certain percentage of the string being printable;
that may be worth revisiting as well.

(issue #100)

											
										
										
											2021-08-02 00:36:05 +00:00
+								                //if (!charTest((byte)(val & 0x7f))) {
 								                //    // Not a matching character, fail.
 								                //    return -1;
 								                //}
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								            }
-												Allow single-character DCI strings

The DCI string format uses character values where the high bit of the
last byte differs from the rest of the string.  Usually all the high
bits are clear except on the last byte, but SourceGen generally allows
either polarity.

This gets a little uncertain with single-character strings, because
SourceGen can't auto-detect DCI very effectively.  A series of bytes
with the high bit set could be a single high-ASCII string or a series
of single-byte DCI strings.

The motivation for allowing them is C64 PETSCII.  While ASCII allows
"high ASCII" as an escape hatch, PETSCII doesn't have that option, so
there's no way to mark the data as a character or a string.  We still
want to do a bit of screening, but if the user specifies a non-ASCII
character set and the selected bytes have their high bits set, we
want to just treat the whole set as 1-byte DCI.

Some minor adjustments were needed for a couple of validity checks
that expected longer strings.

This adds some short DCI strings in different character sets to the
char-encoding regression tests.

(for issue #102)

											
										
										
											2021-08-08 22:38:39 +00:00
+								            bool isAscii = charTest(0x5c);      // temporary hack
 								            if (isAscii && stringCount == end - start + 1) {
 								                // Entire region is single-character strings.  Don't allow for ASCII or
 								                // high ASCII.
 								                return -1;
 								            }
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								            return stringCount;
 								        }
-												Don't reject strings with "invalid" characters

When formatting one or more strings with the Edit Data Operand dialog,
the code must determine which options to present.  If the selected
bytes appear to represent one or more null-terminated strings, that
option is enabled in the UI.

The "format recognizers" enforce some strict rules, e.g. null-
terminated strings must end in $00, and also try to confirm that the
data looks like a printable string.  The algorithm rejects strings
with "illegal" characters in them.  This is simpler on some systems
than others.  For example, C64 PETSCII defines quite a few control
characters in ways that make them useful for embedding in printable
strings.

The "recognizers" are only used by the operand edit feature, not as
part of an automated string detector, so there's no real upside in
overriding the user's desire to form a string with arbitrary bytes.

This removes the quick rejection from the four recognizers (null-term,
len8, len16, dci).  It does not alter the high-level code, which
still insists on a certain percentage of the string being printable;
that may be worth revisiting as well.

(issue #100)

											
										
										
											2021-08-02 00:36:05 +00:00
+								#if false
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								        /// <summary>
 								        /// Counts strings in reverse Dextral Character Inverted format, meaning the string is
 								        /// stored in reverse order in memory, and the high bit on the first (last) byte is
 								        /// the opposite of the rest.
 								        ///
 								        /// Each string must be at least two bytes.  To reduce false-positives, we require
 								        /// that all strings have the same hi/lo pattern.
 								        /// </summary>
 								        /// <param name="fileData">Raw data.</param>
 								        /// <param name="start">Offset of first byte in range.</param>
 								        /// <param name="end">Offset of last byte in range.</param>
 								        /// <returns>Number of strings found, or -1 if bad data identified.</returns>
 								        public static int RecognizeReverseDciStrings(byte[] fileData, int start, int end) {
 								            int expectedHiBit = fileData[end] & 0x80;
 								            int stringCount = 0;
 								            int stringLen = 0;
 								            // Quick test on last (first) byte.
 								            if ((fileData[start] & 0x80) == expectedHiBit) {
 								                return -1;
 								            }
 								            for (int i = end; i >= start; i--) {
 								                byte val = fileData[i];
 								                if ((val & 0x80) != expectedHiBit) {
 								                    // end of string
 								                    if (stringLen == 0) {
 								                        // Got two consecutive bytes with end-marker polarity... fail.
 								                        return -1;
 								                    }
 								                    stringCount++;
 								                    stringLen = 0;
 								                } else {
 								                    stringLen++;
 								                }
 								                val &= 0x7f;
 								                if (val < 0x20 || val == 0x7f) {
 								                    // Non-ASCII, fail.
 								                    return -1;
 								                }
 								            }
 								            return stringCount;
 								        }
-												Don't reject strings with "invalid" characters

When formatting one or more strings with the Edit Data Operand dialog,
the code must determine which options to present.  If the selected
bytes appear to represent one or more null-terminated strings, that
option is enabled in the UI.

The "format recognizers" enforce some strict rules, e.g. null-
terminated strings must end in $00, and also try to confirm that the
data looks like a printable string.  The algorithm rejects strings
with "illegal" characters in them.  This is simpler on some systems
than others.  For example, C64 PETSCII defines quite a few control
characters in ways that make them useful for embedding in printable
strings.

The "recognizers" are only used by the operand edit feature, not as
part of an automated string detector, so there's no real upside in
overriding the user's desire to form a string with arbitrary bytes.

This removes the quick rejection from the four recognizers (null-term,
len8, len16, dci).  It does not alter the high-level code, which
still insists on a certain percentage of the string being printable;
that may be worth revisiting as well.

(issue #100)

											
										
										
											2021-08-02 00:36:05 +00:00
+								#endif
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
-												Check formatted string structure at load time

If we have a bug, or somebody edits the project file manually, we
can end up with a very wrong string, such as a null-terminated
string that isn't, or a DCI string that has a mix of high and low
ASCII from start to finish.  We now check all incoming strings for
validity, and discard any that fail the test.  The verification
code is shared with the extension script inline data formatter.

Also, added a comment to an F8-ROM symbol I stumbled over.

											
										
										
											2019-10-07 00:07:07 +00:00
+								        /// <summary>
 								        /// Verifies that the string data is what is expected.  Does not attempt to check
 								        /// the character encoding, just the structure.
 								        /// </summary>
 								        /// <param name="fileData">Raw data.</param>
 								        /// <param name="offset">Start offset of string.</param>
 								        /// <param name="length">Length of string, including leading length and terminating
 								        ///   null bytes.</param>
 								        /// <param name="type">Expected string type.</param>
 								        /// <param name="failMsg">Detailed failure message.</param>
 								        /// <returns>True if all is well.</returns>
 								        public static bool VerifyStringData(byte[] fileData, int offset, int length,
 								                FormatDescriptor.Type type, out string failMsg) {
 								            failMsg = string.Empty;
 								            switch (type) {
 								                case FormatDescriptor.Type.StringGeneric:
 								                case FormatDescriptor.Type.StringReverse:
 								                    return true;
 								                case FormatDescriptor.Type.StringNullTerm:
 								                    // must end in null byte, and have no null bytes before the end
 								                    int chk = offset;
 								                    while (length-- != 0) {
 								                        byte val = fileData[chk++];
 								                        if (val == 0x00) {
 								                            if (length != 0) {
 								                                failMsg = Res.Strings.STR_VFY_NULL_INSIDE_NULL_TERM;
 								                                return false;
 								                            } else {
 								                                return true;
 								                            }
 								                        }
 								                    }
 								                    failMsg = Res.Strings.STR_VFY_MISSING_NULL_TERM;
 								                    return false;
 								                case FormatDescriptor.Type.StringL8:
 								                    if (fileData[offset] != length - 1) {
 								                        failMsg = Res.Strings.STR_VFY_L1_LENGTH_MISMATCH;
 								                        return false;
 								                    }
 								                    return true;
 								                case FormatDescriptor.Type.StringL16:
 								                    int len = RawData.GetWord(fileData, offset, 2, false);
 								                    if (len != length - 2) {
 								                        failMsg = Res.Strings.STR_VFY_L2_LENGTH_MISMATCH;
 								                        return false;
 								                    }
 								                    return true;
 								                case FormatDescriptor.Type.StringDci:
-												Allow single-character DCI strings

The DCI string format uses character values where the high bit of the
last byte differs from the rest of the string.  Usually all the high
bits are clear except on the last byte, but SourceGen generally allows
either polarity.

This gets a little uncertain with single-character strings, because
SourceGen can't auto-detect DCI very effectively.  A series of bytes
with the high bit set could be a single high-ASCII string or a series
of single-byte DCI strings.

The motivation for allowing them is C64 PETSCII.  While ASCII allows
"high ASCII" as an escape hatch, PETSCII doesn't have that option, so
there's no way to mark the data as a character or a string.  We still
want to do a bit of screening, but if the user specifies a non-ASCII
character set and the selected bytes have their high bits set, we
want to just treat the whole set as 1-byte DCI.

Some minor adjustments were needed for a couple of validity checks
that expected longer strings.

This adds some short DCI strings in different character sets to the
char-encoding regression tests.

(for issue #102)

											
										
										
											2021-08-08 22:38:39 +00:00
+								                    //if (length < 2) {
 								                    //    failMsg = Res.Strings.STR_VFY_DCI_SHORT;
 								                    //    return false;
 								                    //}
-												Check formatted string structure at load time

If we have a bug, or somebody edits the project file manually, we
can end up with a very wrong string, such as a null-terminated
string that isn't, or a DCI string that has a mix of high and low
ASCII from start to finish.  We now check all incoming strings for
validity, and discard any that fail the test.  The verification
code is shared with the extension script inline data formatter.

Also, added a comment to an F8-ROM symbol I stumbled over.

											
										
										
											2019-10-07 00:07:07 +00:00
+								                    byte first = (byte)(fileData[offset] & 0x80);
 								                    for (int i = offset + 1; i < offset + length - 1; i++) {
 								                        if ((fileData[i] & 0x80) != first) {
 								                            failMsg = Res.Strings.STR_VFY_DCI_MIXED_DATA;
 								                            return false;
 								                        }
 								                    }
-												Allow single-character DCI strings

The DCI string format uses character values where the high bit of the
last byte differs from the rest of the string.  Usually all the high
bits are clear except on the last byte, but SourceGen generally allows
either polarity.

This gets a little uncertain with single-character strings, because
SourceGen can't auto-detect DCI very effectively.  A series of bytes
with the high bit set could be a single high-ASCII string or a series
of single-byte DCI strings.

The motivation for allowing them is C64 PETSCII.  While ASCII allows
"high ASCII" as an escape hatch, PETSCII doesn't have that option, so
there's no way to mark the data as a character or a string.  We still
want to do a bit of screening, but if the user specifies a non-ASCII
character set and the selected bytes have their high bits set, we
want to just treat the whole set as 1-byte DCI.

Some minor adjustments were needed for a couple of validity checks
that expected longer strings.

This adds some short DCI strings in different character sets to the
char-encoding regression tests.

(for issue #102)

											
										
										
											2021-08-08 22:38:39 +00:00
+								                    if (length > 1 && (fileData[offset + length - 1] & 0x80) == first) {
-												Check formatted string structure at load time

If we have a bug, or somebody edits the project file manually, we
can end up with a very wrong string, such as a null-terminated
string that isn't, or a DCI string that has a mix of high and low
ASCII from start to finish.  We now check all incoming strings for
validity, and discard any that fail the test.  The verification
code is shared with the extension script inline data formatter.

Also, added a comment to an F8-ROM symbol I stumbled over.

											
										
										
											2019-10-07 00:07:07 +00:00
+								                        failMsg = Res.Strings.STR_VFY_DCI_NOT_TERMINATED;
 								                        return false;
 								                    }
 								                    return true;
 								                default:
 								                    Debug.Assert(false);
 								                    return false;
 								            }
 								        }
-												Add C64 encodings to instruction and data operand editors

Both dialogs got a couple extra radio buttons for selection of
single character operands.  The data operand editor got a combo box
that lets you specify how it scans for viable strings.

Various string scanning methods were made more generic.  This got a
little strange with auto-detection of low/high ASCII, but that was
mostly a matter of keeping the previous code around as a special
case.

Made C64 Screen Code DCI strings a thing that works.

											
										
										
											2019-08-16 00:53:12 +00:00
+								        #endregion // Static analyzers
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								    }
 								}
-												Add multiple encoding support to uncategorized data analyzer

The code that searches for character strings in uncategorized data
now recognizes the C64 encodings when selected in the project
properties.

The new code avoids some redundant comparisons when runs of
printable characters are found.  I suspect the new implementation
loses on overall performance because we're now calling through
delegates instead of testing characters directly, but I haven't
tested for that.

											
										
										
											2019-08-13 21:08:27 +00:00
+								#if DATA_PRESCAN
-												Copy some non-UI code over

Mostly a straight copy & paste of the files.  The only significant
change was to move the localizable strings from Properties/Resources
(RESX) to Res/Strings.xaml (Resource Dictionary).  I expect a
number of strings will no longer be needed, since WPF lets you put
more of the UI/UX logic into the design side.

I also renamed the namespace to SourceGenWPF, and put the app icon
into the Res directory so it can be a resource rather than a loose
file.  I'm merging the "Setup" directory contents into the main app
since there wasn't a whole lot going on there.

The WPF Color class lacks conversions to/from a 32-bit integer, so
I added those.

None of the stuff is wired up yet.

											
										
										
											2019-05-02 22:45:40 +00:00
+								        /// <summary>
 								        /// Iterator that generates a list of offsets which are not known to hold code or data.
 								        ///
 								        /// Generates a set of integers in ascending order.
 								        /// </summary>
 								        private class UndeterminedValueIterator : IEnumerator {
 								            /// <summary>
 								            /// Index of current item, or -1 if we're not started yet.
 								            /// </summary>
 								            private int mCurIndex;
 								            /// <summary>
 								            /// Reference to Anattrib array we're iterating over.
 								            /// </summary>
 								            private Anattrib[] mAnattribs;
 								            /// <summary>
 								            /// Constructor.
 								            /// </summary>
 								            public UndeterminedValueIterator(Anattrib[] anattribs) {
 								                mAnattribs = anattribs;
 								                Reset();
 								            }
 								            // IEnumerator: current element
 								            public object Current {
 								                get {
 								                    if (mCurIndex < 0) {
 								                        // not started
 								                        return null;
 								                    }
 								                    return mCurIndex;
 								                }
 								            }
 								            // IEnumerator: move to the next element, returning false if there isn't one
 								            public bool MoveNext() {
 								                while (++mCurIndex < mAnattribs.Length) {
 								                    Anattrib attr = mAnattribs[mCurIndex];
 								                    if (attr.IsInstructionStart) {
 								                        // skip past instruction
 								                        mCurIndex += attr.Length - 1;
 								                    } else if (attr.IsUncategorized) {
 								                        // got one
 								                        return true;
 								                    }
 								                }
 								                return false;
 								            }
 								            // IEnumerator: reset state
 								            public void Reset() {
 								                mCurIndex = -1;
 								            }
 								        }
 								#endif