2019-07-09 00:02:25 +00:00
|
|
|
|
/*
|
|
|
|
|
* Copyright 2019 faddenSoft
|
|
|
|
|
*
|
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
|
*
|
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
*
|
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
|
* limitations under the License.
|
|
|
|
|
*/
|
|
|
|
|
using System;
|
|
|
|
|
using System.Collections.Generic;
|
|
|
|
|
using System.ComponentModel;
|
|
|
|
|
using System.Diagnostics;
|
|
|
|
|
using System.Runtime.CompilerServices;
|
|
|
|
|
using System.Windows;
|
|
|
|
|
using System.Windows.Controls;
|
|
|
|
|
|
2019-08-16 00:53:12 +00:00
|
|
|
|
using Asm65;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
using CommonUtil;
|
2019-08-16 00:53:12 +00:00
|
|
|
|
using TextScanMode = SourceGen.ProjectProperties.AnalysisParameters.TextScanMode;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
|
2019-07-20 20:28:10 +00:00
|
|
|
|
namespace SourceGen.WpfGui {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
/// <summary>
|
|
|
|
|
/// Data operand editor.
|
|
|
|
|
/// </summary>
|
|
|
|
|
public partial class EditDataOperand : Window, INotifyPropertyChanged {
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Result set that describes the formatting to perform. Not all regions will have
|
|
|
|
|
/// the same format, e.g. the "mixed ASCII" mode will alternate strings and bytes
|
|
|
|
|
/// (rather than a dedicated "mixed ASCII" format type).
|
|
|
|
|
/// </summary>
|
|
|
|
|
public SortedList<int, FormatDescriptor> Results { get; private set; }
|
|
|
|
|
|
2019-07-11 20:56:16 +00:00
|
|
|
|
/// <summary>
|
|
|
|
|
/// Set to true when input is valid. Controls whether the OK button is enabled.
|
|
|
|
|
/// </summary>
|
|
|
|
|
public bool IsValid {
|
|
|
|
|
get { return mIsValid; }
|
|
|
|
|
set {
|
|
|
|
|
mIsValid = value;
|
|
|
|
|
OnPropertyChanged();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
private bool mIsValid;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Selected offsets. An otherwise contiguous range of offsets can be broken up
|
|
|
|
|
/// by user-specified labels and address discontinuities, so this needs to be
|
|
|
|
|
/// processed by range.
|
|
|
|
|
/// </summary>
|
|
|
|
|
private TypedRangeSet mSelection;
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// FormatDescriptor from the first offset. May be null if the offset doesn't
|
|
|
|
|
/// have a format descriptor specified. This will be used to configure the
|
|
|
|
|
/// dialog controls if the format is suited to the selection. The goal is to
|
|
|
|
|
/// make single-item editing work as expected.
|
|
|
|
|
/// </summary>
|
|
|
|
|
public FormatDescriptor mFirstFormatDescriptor;
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Raw file data.
|
|
|
|
|
/// </summary>
|
|
|
|
|
private byte[] mFileData;
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Symbol table to use when resolving symbolic values.
|
|
|
|
|
/// </summary>
|
|
|
|
|
private SymbolTable mSymbolTable;
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Formatter to use when displaying addresses and hex values.
|
|
|
|
|
/// </summary>
|
|
|
|
|
private Asm65.Formatter mFormatter;
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Set to true if, during the initial setup, the format defined by FirstFormatDescriptor
|
|
|
|
|
/// was unavailable.
|
|
|
|
|
/// </summary>
|
|
|
|
|
private bool mPreferredFormatUnavailable;
|
|
|
|
|
|
2019-08-16 00:53:12 +00:00
|
|
|
|
/// <summary>
|
|
|
|
|
/// Text encoding combo box item. We use the same TextScanMode enum that the
|
|
|
|
|
/// uncategorized data analyzer uses.
|
|
|
|
|
/// </summary>
|
|
|
|
|
public class StringEncodingItem {
|
|
|
|
|
public string Name { get; private set; }
|
|
|
|
|
public TextScanMode Mode { get; private set; }
|
|
|
|
|
|
|
|
|
|
public StringEncodingItem(string name, TextScanMode mode) {
|
|
|
|
|
Name = name;
|
|
|
|
|
Mode = mode;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
public StringEncodingItem[] StringEncodingItems { get; private set; }
|
|
|
|
|
|
2019-07-09 00:02:25 +00:00
|
|
|
|
// INotifyPropertyChanged implementation
|
|
|
|
|
public event PropertyChangedEventHandler PropertyChanged;
|
|
|
|
|
private void OnPropertyChanged([CallerMemberName] string propertyName = "") {
|
|
|
|
|
PropertyChanged?.Invoke(this, new PropertyChangedEventArgs(propertyName));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public EditDataOperand(Window owner, byte[] fileData, SymbolTable symbolTable,
|
|
|
|
|
Asm65.Formatter formatter, TypedRangeSet trs, FormatDescriptor firstDesc) {
|
|
|
|
|
InitializeComponent();
|
|
|
|
|
Owner = owner;
|
|
|
|
|
DataContext = this;
|
|
|
|
|
|
|
|
|
|
mFileData = fileData;
|
|
|
|
|
mSymbolTable = symbolTable;
|
|
|
|
|
mFormatter = formatter;
|
|
|
|
|
mSelection = trs;
|
|
|
|
|
mFirstFormatDescriptor = firstDesc;
|
2019-08-16 00:53:12 +00:00
|
|
|
|
|
|
|
|
|
StringEncodingItems = new StringEncodingItem[] {
|
|
|
|
|
new StringEncodingItem(Res.Strings.SCAN_LOW_ASCII,
|
|
|
|
|
TextScanMode.LowAscii),
|
|
|
|
|
new StringEncodingItem(Res.Strings.SCAN_LOW_HIGH_ASCII,
|
|
|
|
|
TextScanMode.LowHighAscii),
|
|
|
|
|
new StringEncodingItem(Res.Strings.SCAN_C64_PETSCII,
|
|
|
|
|
TextScanMode.C64Petscii),
|
|
|
|
|
new StringEncodingItem(Res.Strings.SCAN_C64_SCREEN_CODE,
|
|
|
|
|
TextScanMode.C64ScreenCode),
|
|
|
|
|
};
|
2019-07-09 00:02:25 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private void Window_Loaded(object sender, RoutedEventArgs e) {
|
|
|
|
|
DateTime startWhen = DateTime.Now;
|
|
|
|
|
|
|
|
|
|
// Determine which of the various options is suitable for the selected offsets.
|
|
|
|
|
// Disable any radio buttons that won't work.
|
|
|
|
|
AnalyzeRanges();
|
|
|
|
|
|
2019-08-16 00:53:12 +00:00
|
|
|
|
// This gets invoked a bit later, from the "selection changed" callback.
|
|
|
|
|
//AnalyzeStringRanges(TextScanMode.LowHighAscii);
|
|
|
|
|
|
2019-07-09 00:02:25 +00:00
|
|
|
|
// Configure the dialog from the FormatDescriptor, if one is available.
|
2019-07-11 20:56:16 +00:00
|
|
|
|
Debug.WriteLine("First FD: " + mFirstFormatDescriptor);
|
|
|
|
|
SetControlsFromDescriptor(mFirstFormatDescriptor);
|
2019-07-09 00:02:25 +00:00
|
|
|
|
|
|
|
|
|
if (mPreferredFormatUnavailable) {
|
|
|
|
|
// This can happen when e.g. a bunch of stuff is formatted as null-terminated
|
|
|
|
|
// strings. We don't recognize a lone zero as a string, but we allow it if
|
|
|
|
|
// it's next to a bunch of others. If you come back later and try to format
|
|
|
|
|
// just that one byte, you end up here.
|
|
|
|
|
// TODO(maybe): make it more obvious what's going on?
|
|
|
|
|
Debug.WriteLine("NOTE: preferred format unavailable");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
UpdateControls();
|
|
|
|
|
|
|
|
|
|
Debug.WriteLine("EditData dialog load time: " +
|
|
|
|
|
(DateTime.Now - startWhen).TotalMilliseconds + " ms");
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-11 20:56:16 +00:00
|
|
|
|
private void Window_ContentRendered(object sender, EventArgs e) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
// Start with the focus in the text box if the initial format allows for a
|
|
|
|
|
// symbolic reference. This way they can start typing immediately.
|
2019-07-11 20:56:16 +00:00
|
|
|
|
if (simpleDisplayAsGroupBox.IsEnabled) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
symbolEntryTextBox.Focus();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2019-07-11 20:56:16 +00:00
|
|
|
|
/// Handles Checked event for all buttons in Main group.
|
2019-07-09 00:02:25 +00:00
|
|
|
|
/// </summary>
|
|
|
|
|
private void MainGroup_CheckedChanged(object sender, EventArgs e) {
|
|
|
|
|
// Enable/disable the style group and the low/high/bank radio group.
|
|
|
|
|
// Update preview window.
|
|
|
|
|
UpdateControls();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2019-07-11 20:56:16 +00:00
|
|
|
|
/// Handles Checked event for radio buttons in the Display group.
|
2019-07-09 00:02:25 +00:00
|
|
|
|
/// group box.
|
|
|
|
|
/// </summary>
|
|
|
|
|
private void SimpleDisplay_CheckedChanged(object sender, EventArgs e) {
|
|
|
|
|
// Enable/disable the low/high/bank radio group.
|
|
|
|
|
UpdateControls();
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-11 20:56:16 +00:00
|
|
|
|
private void SymbolEntryTextBox_TextChanged(object sender, TextChangedEventArgs e) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
// Make sure Symbol is checked if they're typing text in.
|
2019-07-11 20:56:16 +00:00
|
|
|
|
//Debug.Assert(radioSimpleDataSymbolic.IsEnabled);
|
|
|
|
|
radioSimpleDataSymbolic.IsChecked = true;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
// Update OK button based on symbol validity.
|
|
|
|
|
UpdateControls();
|
|
|
|
|
}
|
|
|
|
|
|
2019-08-16 00:53:12 +00:00
|
|
|
|
/// <summary>
|
|
|
|
|
/// Sets the string encoding combo box to an item that matches the specified mode. If
|
|
|
|
|
/// the mode can't be found, an arbitrary entry will be chosen.
|
|
|
|
|
/// </summary>
|
|
|
|
|
private void SetStringEncoding(TextScanMode mode) {
|
|
|
|
|
StringEncodingItem choice = null;
|
|
|
|
|
foreach (StringEncodingItem item in StringEncodingItems) {
|
|
|
|
|
if (item.Mode == mode) {
|
|
|
|
|
choice = item;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (choice == null) {
|
|
|
|
|
choice = StringEncodingItems[1];
|
|
|
|
|
}
|
|
|
|
|
stringEncodingComboBox.SelectedItem = choice;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private void StringEncodingComboBox_SelectionChanged(object sender, SelectionChangedEventArgs e) {
|
|
|
|
|
if (!IsLoaded) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
StringEncodingItem item = (StringEncodingItem)stringEncodingComboBox.SelectedItem;
|
|
|
|
|
AnalyzeStringRanges(item.Mode);
|
|
|
|
|
UpdateControls();
|
|
|
|
|
|
|
|
|
|
AppSettings.Global.SetEnum(AppSettings.OPED_DEFAULT_STRING_ENCODING,
|
|
|
|
|
typeof(TextScanMode), (int)item.Mode);
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-11 20:56:16 +00:00
|
|
|
|
private void OkButton_Click(object sender, RoutedEventArgs e) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
CreateDescriptorListFromControls();
|
|
|
|
|
FormatDescriptor.DebugDumpSortedList(Results);
|
2019-07-11 20:56:16 +00:00
|
|
|
|
DialogResult = true;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Updates all of the controls to reflect the current internal state.
|
|
|
|
|
/// </summary>
|
|
|
|
|
private void UpdateControls() {
|
2019-07-11 20:56:16 +00:00
|
|
|
|
if (!IsLoaded) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Configure the simple data "display as" style box.
|
|
|
|
|
bool wantStyle = false;
|
|
|
|
|
int simpleWidth = -1;
|
|
|
|
|
bool isBigEndian = false;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
if (radioSingleBytes.IsChecked == true) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
wantStyle = true;
|
|
|
|
|
simpleWidth = 1;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
} else if (radio16BitLittle.IsChecked == true) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
wantStyle = true;
|
|
|
|
|
simpleWidth = 2;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
} else if (radio16BitBig.IsChecked == true) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
wantStyle = true;
|
|
|
|
|
simpleWidth = 2;
|
|
|
|
|
isBigEndian = true;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
} else if (radio24BitLittle.IsChecked == true) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
wantStyle = true;
|
|
|
|
|
simpleWidth = 3;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
} else if (radio32BitLittle.IsChecked == true) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
wantStyle = true;
|
|
|
|
|
simpleWidth = 4;
|
|
|
|
|
}
|
2019-07-11 20:56:16 +00:00
|
|
|
|
bool focusOnSymbol = !simpleDisplayAsGroupBox.IsEnabled && wantStyle;
|
|
|
|
|
simpleDisplayAsGroupBox.IsEnabled = wantStyle;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
if (wantStyle) {
|
2019-08-16 00:53:12 +00:00
|
|
|
|
// Because this covers multiple items in a data area, we allow the
|
|
|
|
|
// "extended" set, which includes some control characters.
|
|
|
|
|
radioSimpleDataAscii.IsEnabled = IsCompatibleWithCharSet(simpleWidth,
|
|
|
|
|
isBigEndian, CharEncoding.IsExtendedLowOrHighAscii);
|
|
|
|
|
radioSimpleDataPetscii.IsEnabled = IsCompatibleWithCharSet(simpleWidth,
|
|
|
|
|
isBigEndian, CharEncoding.IsExtendedC64Petscii);
|
|
|
|
|
radioSimpleDataScreenCode.IsEnabled = IsCompatibleWithCharSet(simpleWidth,
|
|
|
|
|
isBigEndian, CharEncoding.IsExtendedC64ScreenCode);
|
2019-07-09 00:02:25 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Enable the symbolic reference entry box if the "display as" group is enabled.
|
|
|
|
|
// That way instead of "click 16-bit", "click symbol", "enter symbol", the user
|
|
|
|
|
// can skip the second step.
|
2019-07-11 20:56:16 +00:00
|
|
|
|
symbolEntryTextBox.IsEnabled = simpleDisplayAsGroupBox.IsEnabled;
|
|
|
|
|
|
|
|
|
|
// Part panel is enabled when Symbol is checked. (Now handled in XAML.)
|
|
|
|
|
//symbolPartPanel.IsEnabled = (radioSimpleDataSymbolic.IsChecked == true);
|
2019-07-09 00:02:25 +00:00
|
|
|
|
|
|
|
|
|
// If we just enabled the group box, set the focus on the symbol entry box. This
|
|
|
|
|
// removes another click from the steps, though it's a bit aggressive if you're
|
|
|
|
|
// trying to arrow your way through the items.
|
|
|
|
|
if (focusOnSymbol) {
|
|
|
|
|
symbolEntryTextBox.Focus();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool isOk = true;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
if (radioSimpleDataSymbolic.IsChecked == true) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
// Just check for correct format. References to non-existent labels are allowed.
|
|
|
|
|
isOk = Asm65.Label.ValidateLabel(symbolEntryTextBox.Text);
|
|
|
|
|
|
|
|
|
|
// Actually, let's discourage references to auto-labels.
|
|
|
|
|
if (isOk && mSymbolTable.TryGetValue(symbolEntryTextBox.Text, out Symbol sym)) {
|
|
|
|
|
isOk = sym.SymbolSource != Symbol.Source.Auto;
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-07-11 20:56:16 +00:00
|
|
|
|
IsValid = isOk;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
}
|
|
|
|
|
|
2019-07-11 20:56:16 +00:00
|
|
|
|
#region Setup
|
|
|
|
|
|
2019-07-09 00:02:25 +00:00
|
|
|
|
/// <summary>
|
|
|
|
|
/// Analyzes the selection to see which data formatting options are suitable.
|
|
|
|
|
/// Disables radio buttons and updates labels.
|
|
|
|
|
///
|
|
|
|
|
/// Call this once, when the dialog is first loaded.
|
|
|
|
|
/// </summary>
|
|
|
|
|
private void AnalyzeRanges() {
|
2019-07-11 20:56:16 +00:00
|
|
|
|
Debug.Assert(mSelection.Count != 0);
|
|
|
|
|
|
|
|
|
|
string fmt, infoStr;
|
|
|
|
|
if (mSelection.RangeCount == 1 && mSelection.Count == 1) {
|
|
|
|
|
infoStr = (string)FindResource("str_SingleByte");
|
|
|
|
|
} else if (mSelection.RangeCount == 1) {
|
|
|
|
|
fmt = (string)FindResource("str_SingleGroup");
|
|
|
|
|
infoStr = string.Format(fmt, mSelection.Count);
|
|
|
|
|
} else {
|
|
|
|
|
fmt = (string)FindResource("str_MultiGroup");
|
|
|
|
|
infoStr = string.Format(fmt, mSelection.Count, mSelection.RangeCount);
|
|
|
|
|
}
|
|
|
|
|
selectFormatLabel.Text = infoStr;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
|
2019-07-11 20:56:16 +00:00
|
|
|
|
IEnumerator<TypedRangeSet.TypedRange> iter = mSelection.RangeListIterator;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
|
|
|
|
|
// For each range, check to see if the data within qualifies for the various
|
|
|
|
|
// options. If any of them fail to meet the criteria, the option is disabled
|
|
|
|
|
// for all ranges.
|
|
|
|
|
while (iter.MoveNext()) {
|
|
|
|
|
TypedRangeSet.TypedRange rng = iter.Current;
|
|
|
|
|
Debug.WriteLine("Testing [" + rng.Low + ", " + rng.High + "]");
|
|
|
|
|
|
2019-08-16 00:53:12 +00:00
|
|
|
|
// Note single-byte and dense are always enabled.
|
2019-07-09 00:02:25 +00:00
|
|
|
|
|
|
|
|
|
int count = rng.High - rng.Low + 1;
|
|
|
|
|
Debug.Assert(count > 0);
|
|
|
|
|
if ((count & 0x01) != 0) {
|
|
|
|
|
// not divisible by 2, disallow 16-bit entries
|
2019-07-11 20:56:16 +00:00
|
|
|
|
radio16BitLittle.IsEnabled = false;
|
|
|
|
|
radio16BitBig.IsEnabled = false;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
}
|
|
|
|
|
if ((count & 0x03) != 0) {
|
|
|
|
|
// not divisible by 4, disallow 32-bit entries
|
2019-07-11 20:56:16 +00:00
|
|
|
|
radio32BitLittle.IsEnabled = false;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
}
|
|
|
|
|
if ((count / 3) * 3 != count) {
|
|
|
|
|
// not divisible by 3, disallow 24-bit entries
|
2019-07-11 20:56:16 +00:00
|
|
|
|
radio24BitLittle.IsEnabled = false;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Check for run of bytes (2 or more of the same thing). Remember that
|
|
|
|
|
// we check this one region at a time, and each region could have different
|
|
|
|
|
// bytes, but so long as the bytes are all the same within a region we're good.
|
2019-07-11 20:56:16 +00:00
|
|
|
|
if (radioFill.IsEnabled && count > 1 &&
|
2019-07-09 00:02:25 +00:00
|
|
|
|
DataAnalysis.RecognizeRun(mFileData, rng.Low, rng.High) == count) {
|
|
|
|
|
// LGTM
|
|
|
|
|
} else {
|
2019-07-11 20:56:16 +00:00
|
|
|
|
radioFill.IsEnabled = false;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
}
|
2019-08-16 00:53:12 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Analyzes the selection to see which string formatting options are suitable.
|
|
|
|
|
/// Disables radio buttons and updates labels.
|
|
|
|
|
///
|
|
|
|
|
/// Call this when the character encoding selection changes.
|
|
|
|
|
/// </summary>
|
|
|
|
|
private void AnalyzeStringRanges(TextScanMode scanMode) {
|
|
|
|
|
Debug.WriteLine("Analyzing string ranges");
|
|
|
|
|
Debug.Assert(IsLoaded);
|
|
|
|
|
|
|
|
|
|
int mixedCharOkCount = 0;
|
|
|
|
|
int mixedCharNotCount = 0;
|
|
|
|
|
int nullTermStringCount = 0;
|
|
|
|
|
int len8StringCount = 0;
|
|
|
|
|
int len16StringCount = 0;
|
|
|
|
|
int dciStringCount = 0;
|
|
|
|
|
|
|
|
|
|
CharEncoding.InclusionTest charTest;
|
|
|
|
|
switch (scanMode) {
|
|
|
|
|
case TextScanMode.LowAscii:
|
|
|
|
|
charTest = CharEncoding.IsExtendedAscii;
|
|
|
|
|
break;
|
|
|
|
|
case TextScanMode.LowHighAscii:
|
|
|
|
|
charTest = CharEncoding.IsExtendedLowOrHighAscii;
|
|
|
|
|
break;
|
|
|
|
|
case TextScanMode.C64Petscii:
|
|
|
|
|
charTest = CharEncoding.IsExtendedC64Petscii;
|
|
|
|
|
break;
|
|
|
|
|
case TextScanMode.C64ScreenCode:
|
|
|
|
|
charTest = CharEncoding.IsExtendedC64ScreenCode;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
Debug.Assert(false);
|
|
|
|
|
charTest = CharEncoding.IsExtendedAscii;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
radioStringMixed.IsEnabled = true;
|
|
|
|
|
radioStringMixedReverse.IsEnabled = true;
|
|
|
|
|
radioStringNullTerm.IsEnabled = (scanMode != TextScanMode.C64ScreenCode);
|
|
|
|
|
radioStringLen8.IsEnabled = true;
|
|
|
|
|
radioStringLen16.IsEnabled = true;
|
2019-08-21 00:55:12 +00:00
|
|
|
|
radioStringDci.IsEnabled = true;
|
2019-08-16 00:53:12 +00:00
|
|
|
|
|
|
|
|
|
IEnumerator<TypedRangeSet.TypedRange> iter = mSelection.RangeListIterator;
|
|
|
|
|
while (iter.MoveNext()) {
|
|
|
|
|
TypedRangeSet.TypedRange rng = iter.Current;
|
|
|
|
|
Debug.WriteLine("Testing [" + rng.Low + ", " + rng.High + "]");
|
2019-07-09 00:02:25 +00:00
|
|
|
|
|
|
|
|
|
// See if there's enough string data to make it worthwhile. We use an
|
2019-08-16 00:53:12 +00:00
|
|
|
|
// arbitrary threshold of 2+ printable characters, and require twice as many
|
|
|
|
|
// printable as non-printable.
|
2019-07-11 20:56:16 +00:00
|
|
|
|
if (radioStringMixed.IsEnabled) {
|
2019-08-16 00:53:12 +00:00
|
|
|
|
if (scanMode == TextScanMode.LowHighAscii) {
|
|
|
|
|
// We use a special test that counts low, high, and non-ASCII.
|
|
|
|
|
// Whichever form of ASCII has the highest count is the winner, and
|
|
|
|
|
// the loser is counted as non-ASCII.
|
|
|
|
|
int asciiCount;
|
|
|
|
|
DataAnalysis.CountHighLowBytes(mFileData, rng.Low, rng.High, charTest,
|
|
|
|
|
out int lowAscii, out int highAscii, out int nonAscii);
|
|
|
|
|
if (highAscii > lowAscii) {
|
|
|
|
|
asciiCount = highAscii;
|
|
|
|
|
nonAscii += lowAscii;
|
|
|
|
|
} else {
|
|
|
|
|
asciiCount = lowAscii;
|
|
|
|
|
nonAscii += highAscii;
|
|
|
|
|
}
|
2019-07-09 00:02:25 +00:00
|
|
|
|
|
2019-08-16 00:53:12 +00:00
|
|
|
|
if (asciiCount >= 2 && asciiCount >= nonAscii * 2) {
|
|
|
|
|
// Looks good
|
|
|
|
|
mixedCharOkCount += asciiCount;
|
|
|
|
|
mixedCharNotCount += nonAscii;
|
|
|
|
|
} else {
|
|
|
|
|
// Fail
|
|
|
|
|
radioStringMixed.IsEnabled = false;
|
|
|
|
|
radioStringMixedReverse.IsEnabled = false;
|
|
|
|
|
mixedCharOkCount = mixedCharNotCount = -1;
|
|
|
|
|
}
|
2019-07-09 00:02:25 +00:00
|
|
|
|
} else {
|
2019-08-16 00:53:12 +00:00
|
|
|
|
int matchCount = DataAnalysis.CountCharacterBytes(mFileData,
|
|
|
|
|
rng.Low, rng.High, charTest);
|
|
|
|
|
int missCount = (rng.High - rng.Low + 1) - matchCount;
|
|
|
|
|
if (matchCount >= 2 && matchCount >= missCount * 2) {
|
|
|
|
|
mixedCharOkCount += matchCount;
|
|
|
|
|
mixedCharNotCount += missCount;
|
|
|
|
|
} else {
|
|
|
|
|
// Fail
|
|
|
|
|
radioStringMixed.IsEnabled = false;
|
|
|
|
|
radioStringMixedReverse.IsEnabled = false;
|
|
|
|
|
mixedCharOkCount = mixedCharNotCount = -1;
|
|
|
|
|
}
|
2019-07-09 00:02:25 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check for null-terminated strings. Zero-length strings are allowed, but
|
|
|
|
|
// not counted -- we want to have some actual character data. Individual
|
2019-08-16 00:53:12 +00:00
|
|
|
|
// ASCII strings need to be entirely high-ASCII or low-ASCII, but not all strings
|
2019-07-09 00:02:25 +00:00
|
|
|
|
// in a region have to be the same.
|
2019-07-11 20:56:16 +00:00
|
|
|
|
if (radioStringNullTerm.IsEnabled) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
int strCount = DataAnalysis.RecognizeNullTerminatedStrings(mFileData,
|
2019-08-16 00:53:12 +00:00
|
|
|
|
rng.Low, rng.High, charTest, scanMode == TextScanMode.LowHighAscii);
|
2019-07-09 00:02:25 +00:00
|
|
|
|
if (strCount > 0) {
|
|
|
|
|
nullTermStringCount += strCount;
|
|
|
|
|
} else {
|
2019-07-11 20:56:16 +00:00
|
|
|
|
radioStringNullTerm.IsEnabled = false;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
nullTermStringCount = -1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check for strings prefixed with an 8-bit length.
|
2019-07-11 20:56:16 +00:00
|
|
|
|
if (radioStringLen8.IsEnabled) {
|
2019-08-16 00:53:12 +00:00
|
|
|
|
int strCount = DataAnalysis.RecognizeLen8Strings(mFileData, rng.Low, rng.High,
|
|
|
|
|
charTest, scanMode == TextScanMode.LowHighAscii);
|
2019-07-09 00:02:25 +00:00
|
|
|
|
if (strCount > 0) {
|
|
|
|
|
len8StringCount += strCount;
|
|
|
|
|
} else {
|
2019-07-11 20:56:16 +00:00
|
|
|
|
radioStringLen8.IsEnabled = false;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
len8StringCount = -1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check for strings prefixed with a 16-bit length.
|
2019-07-11 20:56:16 +00:00
|
|
|
|
if (radioStringLen16.IsEnabled) {
|
2019-08-16 00:53:12 +00:00
|
|
|
|
int strCount = DataAnalysis.RecognizeLen16Strings(mFileData, rng.Low, rng.High,
|
|
|
|
|
charTest, scanMode == TextScanMode.LowHighAscii);
|
2019-07-09 00:02:25 +00:00
|
|
|
|
if (strCount > 0) {
|
|
|
|
|
len16StringCount += strCount;
|
|
|
|
|
} else {
|
2019-07-11 20:56:16 +00:00
|
|
|
|
radioStringLen16.IsEnabled = false;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
len16StringCount = -1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-08-16 00:53:12 +00:00
|
|
|
|
// Check for DCI strings. All strings within the entire range must have the
|
2019-07-09 00:02:25 +00:00
|
|
|
|
// same "polarity", e.g. low ASCII terminated by high ASCII.
|
2019-07-11 20:56:16 +00:00
|
|
|
|
if (radioStringDci.IsEnabled) {
|
2019-08-16 00:53:12 +00:00
|
|
|
|
int strCount = DataAnalysis.RecognizeDciStrings(mFileData, rng.Low, rng.High,
|
|
|
|
|
charTest);
|
2019-07-09 00:02:25 +00:00
|
|
|
|
if (strCount > 0) {
|
|
|
|
|
dciStringCount += strCount;
|
|
|
|
|
} else {
|
2019-07-11 20:56:16 +00:00
|
|
|
|
radioStringDci.IsEnabled = false;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
dciStringCount = -1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Update the dialog with string and character counts, summed across all regions.
|
|
|
|
|
|
2019-08-16 00:53:12 +00:00
|
|
|
|
string fmt;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
const string UNSUP_STR = "xx";
|
|
|
|
|
fmt = (string)FindResource("str_StringMixed");
|
|
|
|
|
string revfmt = (string)FindResource("str_StringMixedReverse");
|
2019-08-16 00:53:12 +00:00
|
|
|
|
if (mixedCharOkCount > 0) {
|
2019-07-11 20:56:16 +00:00
|
|
|
|
Debug.Assert(radioStringMixed.IsEnabled);
|
|
|
|
|
radioStringMixed.Content = string.Format(fmt,
|
2019-08-16 00:53:12 +00:00
|
|
|
|
mixedCharOkCount, mixedCharNotCount);
|
2019-07-11 20:56:16 +00:00
|
|
|
|
radioStringMixedReverse.Content = string.Format(revfmt,
|
2019-08-16 00:53:12 +00:00
|
|
|
|
mixedCharOkCount, mixedCharNotCount);
|
2019-07-09 00:02:25 +00:00
|
|
|
|
} else {
|
2019-07-11 20:56:16 +00:00
|
|
|
|
Debug.Assert(!radioStringMixed.IsEnabled);
|
|
|
|
|
radioStringMixed.Content = string.Format(fmt, UNSUP_STR, UNSUP_STR);
|
|
|
|
|
radioStringMixedReverse.Content = string.Format(revfmt, UNSUP_STR, UNSUP_STR);
|
2019-07-09 00:02:25 +00:00
|
|
|
|
}
|
|
|
|
|
|
2019-07-11 20:56:16 +00:00
|
|
|
|
fmt = (string)FindResource("str_StringNullTerm");
|
2019-07-09 00:02:25 +00:00
|
|
|
|
if (nullTermStringCount > 0) {
|
2019-07-11 20:56:16 +00:00
|
|
|
|
Debug.Assert(radioStringNullTerm.IsEnabled);
|
|
|
|
|
radioStringNullTerm.Content = string.Format(fmt, nullTermStringCount);
|
2019-07-09 00:02:25 +00:00
|
|
|
|
} else {
|
2019-07-11 20:56:16 +00:00
|
|
|
|
Debug.Assert(!radioStringNullTerm.IsEnabled);
|
|
|
|
|
radioStringNullTerm.Content = string.Format(fmt, UNSUP_STR);
|
2019-07-09 00:02:25 +00:00
|
|
|
|
}
|
|
|
|
|
|
2019-07-11 20:56:16 +00:00
|
|
|
|
fmt = (string)FindResource("str_StringLen8");
|
2019-07-09 00:02:25 +00:00
|
|
|
|
if (len8StringCount > 0) {
|
2019-07-11 20:56:16 +00:00
|
|
|
|
Debug.Assert(radioStringLen8.IsEnabled);
|
|
|
|
|
radioStringLen8.Content = string.Format(fmt, len8StringCount);
|
2019-07-09 00:02:25 +00:00
|
|
|
|
} else {
|
2019-07-11 20:56:16 +00:00
|
|
|
|
Debug.Assert(!radioStringLen8.IsEnabled);
|
|
|
|
|
radioStringLen8.Content = string.Format(fmt, UNSUP_STR);
|
2019-07-09 00:02:25 +00:00
|
|
|
|
}
|
|
|
|
|
|
2019-07-11 20:56:16 +00:00
|
|
|
|
fmt = (string)FindResource("str_StringLen16");
|
2019-07-09 00:02:25 +00:00
|
|
|
|
if (len16StringCount > 0) {
|
2019-07-11 20:56:16 +00:00
|
|
|
|
Debug.Assert(radioStringLen16.IsEnabled);
|
|
|
|
|
radioStringLen16.Content = string.Format(fmt, len16StringCount);
|
2019-07-09 00:02:25 +00:00
|
|
|
|
} else {
|
2019-07-11 20:56:16 +00:00
|
|
|
|
Debug.Assert(!radioStringLen16.IsEnabled);
|
|
|
|
|
radioStringLen16.Content = string.Format(fmt, UNSUP_STR);
|
2019-07-09 00:02:25 +00:00
|
|
|
|
}
|
|
|
|
|
|
2019-07-11 20:56:16 +00:00
|
|
|
|
fmt = (string)FindResource("str_StringDci");
|
2019-07-09 00:02:25 +00:00
|
|
|
|
if (dciStringCount > 0) {
|
2019-07-11 20:56:16 +00:00
|
|
|
|
Debug.Assert(radioStringDci.IsEnabled);
|
|
|
|
|
radioStringDci.Content = string.Format(fmt, dciStringCount);
|
2019-07-09 00:02:25 +00:00
|
|
|
|
} else {
|
2019-07-11 20:56:16 +00:00
|
|
|
|
Debug.Assert(!radioStringDci.IsEnabled);
|
|
|
|
|
radioStringDci.Content = string.Format(fmt, UNSUP_STR);
|
|
|
|
|
}
|
2019-08-16 00:53:12 +00:00
|
|
|
|
|
|
|
|
|
// If this invalidated the selected item, reset to Default.
|
|
|
|
|
if ((radioStringMixed.IsChecked == true && !radioStringMixed.IsEnabled) ||
|
|
|
|
|
(radioStringMixedReverse.IsChecked == true && !radioStringMixedReverse.IsEnabled) ||
|
|
|
|
|
(radioStringNullTerm.IsChecked == true && !radioStringNullTerm.IsEnabled) ||
|
|
|
|
|
(radioStringLen8.IsChecked == true && !radioStringLen8.IsEnabled) ||
|
|
|
|
|
(radioStringLen8.IsChecked == true && !radioStringLen8.IsEnabled) ||
|
|
|
|
|
(radioStringDci.IsChecked == true && !radioStringDci.IsEnabled)) {
|
|
|
|
|
|
|
|
|
|
Debug.WriteLine("Previous selection invalidated");
|
|
|
|
|
radioDefaultFormat.IsChecked = true;
|
|
|
|
|
}
|
2019-07-09 00:02:25 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2019-08-16 00:53:12 +00:00
|
|
|
|
/// Determines whether the data in the buffer can be represented as character values.
|
2019-07-09 00:02:25 +00:00
|
|
|
|
/// Using ".DD1 'A'" for 0x41 is obvious, but we also allow ".DD2 'A'" for
|
|
|
|
|
/// 0x41 0x00. 16-bit character constants are more likely as intermediate
|
|
|
|
|
/// operands, but could be found in data areas.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="wordWidth">Number of bytes per character.</param>
|
|
|
|
|
/// <param name="isBigEndian">Word endian-ness.</param>
|
2019-08-16 00:53:12 +00:00
|
|
|
|
/// <param name="charTest">Character test delegate.</param>
|
|
|
|
|
/// <returns>True if data in all regions can be represented as a character.</returns>
|
|
|
|
|
private bool IsCompatibleWithCharSet(int wordWidth, bool isBigEndian,
|
|
|
|
|
CharEncoding.InclusionTest charTest) {
|
2019-07-11 20:56:16 +00:00
|
|
|
|
IEnumerator<TypedRangeSet.TypedRange> iter = mSelection.RangeListIterator;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
while (iter.MoveNext()) {
|
|
|
|
|
TypedRangeSet.TypedRange rng = iter.Current;
|
|
|
|
|
Debug.Assert(((rng.High - rng.Low + 1) / wordWidth) * wordWidth ==
|
|
|
|
|
rng.High - rng.Low + 1);
|
|
|
|
|
for (int i = rng.Low; i <= rng.High; i += wordWidth) {
|
2019-08-16 00:53:12 +00:00
|
|
|
|
int val = RawData.GetWord(mFileData, i, wordWidth, isBigEndian);
|
|
|
|
|
if (val != (byte)val || !charTest((byte)val)) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
// bad value, fail
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Configures the dialog controls based on the provided format descriptor. If
|
|
|
|
|
/// the desired options are unavailable, a suitable default is selected instead.
|
2019-07-11 20:56:16 +00:00
|
|
|
|
///
|
|
|
|
|
/// Call from the Loaded event.
|
2019-07-09 00:02:25 +00:00
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="dfd">FormatDescriptor to use.</param>
|
|
|
|
|
private void SetControlsFromDescriptor(FormatDescriptor dfd) {
|
2019-07-11 20:56:16 +00:00
|
|
|
|
radioSimpleDataHex.IsChecked = true;
|
|
|
|
|
radioSymbolPartLow.IsChecked = true;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
|
2019-08-16 00:53:12 +00:00
|
|
|
|
// Get the previous mode selected in the combo box. If the format descriptor
|
|
|
|
|
// doesn't specify a string, we'll use this.
|
|
|
|
|
TextScanMode textMode = (TextScanMode)AppSettings.Global.GetEnum(
|
|
|
|
|
AppSettings.OPED_DEFAULT_STRING_ENCODING, typeof(TextScanMode),
|
|
|
|
|
(int)TextScanMode.LowHighAscii);
|
|
|
|
|
|
2019-07-09 00:02:25 +00:00
|
|
|
|
if (dfd == null) {
|
2019-07-11 20:56:16 +00:00
|
|
|
|
radioDefaultFormat.IsChecked = true;
|
2019-08-16 00:53:12 +00:00
|
|
|
|
SetStringEncoding(textMode);
|
2019-07-09 00:02:25 +00:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2019-08-16 00:53:12 +00:00
|
|
|
|
if (dfd.IsString) {
|
|
|
|
|
textMode = TextScanModeFromDescriptor(dfd);
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-09 00:02:25 +00:00
|
|
|
|
RadioButton preferredFormat;
|
|
|
|
|
|
|
|
|
|
switch (dfd.FormatType) {
|
|
|
|
|
case FormatDescriptor.Type.NumericLE:
|
|
|
|
|
case FormatDescriptor.Type.NumericBE:
|
|
|
|
|
switch (dfd.Length) {
|
|
|
|
|
case 1:
|
|
|
|
|
preferredFormat = radioSingleBytes;
|
|
|
|
|
break;
|
|
|
|
|
case 2:
|
|
|
|
|
preferredFormat =
|
|
|
|
|
(dfd.FormatType == FormatDescriptor.Type.NumericLE ?
|
|
|
|
|
radio16BitLittle : radio16BitBig);
|
|
|
|
|
break;
|
|
|
|
|
case 3:
|
|
|
|
|
preferredFormat = radio24BitLittle;
|
|
|
|
|
break;
|
|
|
|
|
case 4:
|
|
|
|
|
preferredFormat = radio32BitLittle;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
Debug.Assert(false);
|
|
|
|
|
preferredFormat = radioDefaultFormat;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2019-07-11 20:56:16 +00:00
|
|
|
|
if (preferredFormat.IsEnabled) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
switch (dfd.FormatSubType) {
|
|
|
|
|
case FormatDescriptor.SubType.None:
|
|
|
|
|
case FormatDescriptor.SubType.Hex:
|
2019-07-11 20:56:16 +00:00
|
|
|
|
radioSimpleDataHex.IsChecked = true;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
break;
|
|
|
|
|
case FormatDescriptor.SubType.Decimal:
|
2019-07-11 20:56:16 +00:00
|
|
|
|
radioSimpleDataDecimal.IsChecked = true;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
break;
|
|
|
|
|
case FormatDescriptor.SubType.Binary:
|
2019-07-11 20:56:16 +00:00
|
|
|
|
radioSimpleDataBinary.IsChecked = true;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
break;
|
2019-08-12 00:59:20 +00:00
|
|
|
|
case FormatDescriptor.SubType.Ascii:
|
2019-08-10 21:24:19 +00:00
|
|
|
|
case FormatDescriptor.SubType.HighAscii:
|
2019-08-16 00:53:12 +00:00
|
|
|
|
radioSimpleDataAscii.IsChecked = true;
|
|
|
|
|
break;
|
Change the way string formats are defined
We used to use type="String", with the sub-type indicating whether
the string was null-terminated, prefixed with a length, or whatever.
This didn't leave much room for specifying a character encoding,
which is orthogonal to the sub-type.
What we actually want is to have the type specify the string type,
and then have the sub-type determine the character encoding. These
sub-types can also be used with the Numeric type to specify the
encoding of character operands.
This change updates the enum definitions and the various bits of
code that use them, but does not add any code for working with
non-ASCII character encodings.
The project file version number was incremented to 2, since the new
FormatDescriptor serialization is mildly incompatible with the old.
(Won't explode, but it'll post a complaint and ignore the stuff
it doesn't recognize.)
While I was at it, I finished removing DciReverse. It's still part
of the 2005-string-types regression test, which currently fails
because the generated source doesn't match.
2019-08-07 22:23:23 +00:00
|
|
|
|
case FormatDescriptor.SubType.C64Petscii:
|
2019-08-16 00:53:12 +00:00
|
|
|
|
radioSimpleDataPetscii.IsChecked = true;
|
|
|
|
|
break;
|
Change the way string formats are defined
We used to use type="String", with the sub-type indicating whether
the string was null-terminated, prefixed with a length, or whatever.
This didn't leave much room for specifying a character encoding,
which is orthogonal to the sub-type.
What we actually want is to have the type specify the string type,
and then have the sub-type determine the character encoding. These
sub-types can also be used with the Numeric type to specify the
encoding of character operands.
This change updates the enum definitions and the various bits of
code that use them, but does not add any code for working with
non-ASCII character encodings.
The project file version number was incremented to 2, since the new
FormatDescriptor serialization is mildly incompatible with the old.
(Won't explode, but it'll post a complaint and ignore the stuff
it doesn't recognize.)
While I was at it, I finished removing DciReverse. It's still part
of the 2005-string-types regression test, which currently fails
because the generated source doesn't match.
2019-08-07 22:23:23 +00:00
|
|
|
|
case FormatDescriptor.SubType.C64Screen:
|
2019-08-16 00:53:12 +00:00
|
|
|
|
radioSimpleDataScreenCode.IsChecked = true;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
break;
|
|
|
|
|
case FormatDescriptor.SubType.Address:
|
2019-07-11 20:56:16 +00:00
|
|
|
|
radioSimpleDataAddress.IsChecked = true;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
break;
|
|
|
|
|
case FormatDescriptor.SubType.Symbol:
|
2019-07-11 20:56:16 +00:00
|
|
|
|
radioSimpleDataSymbolic.IsChecked = true;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
switch (dfd.SymbolRef.ValuePart) {
|
|
|
|
|
case WeakSymbolRef.Part.Low:
|
2019-07-11 20:56:16 +00:00
|
|
|
|
radioSymbolPartLow.IsChecked = true;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
break;
|
|
|
|
|
case WeakSymbolRef.Part.High:
|
2019-07-11 20:56:16 +00:00
|
|
|
|
radioSymbolPartHigh.IsChecked = true;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
break;
|
|
|
|
|
case WeakSymbolRef.Part.Bank:
|
2019-07-11 20:56:16 +00:00
|
|
|
|
radioSymbolPartBank.IsChecked = true;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
Debug.Assert(false);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
Debug.Assert(dfd.HasSymbol);
|
|
|
|
|
symbolEntryTextBox.Text = dfd.SymbolRef.Label;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
Debug.Assert(false);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
// preferred format not enabled; leave Hex/Low checked
|
|
|
|
|
}
|
|
|
|
|
break;
|
Change the way string formats are defined
We used to use type="String", with the sub-type indicating whether
the string was null-terminated, prefixed with a length, or whatever.
This didn't leave much room for specifying a character encoding,
which is orthogonal to the sub-type.
What we actually want is to have the type specify the string type,
and then have the sub-type determine the character encoding. These
sub-types can also be used with the Numeric type to specify the
encoding of character operands.
This change updates the enum definitions and the various bits of
code that use them, but does not add any code for working with
non-ASCII character encodings.
The project file version number was incremented to 2, since the new
FormatDescriptor serialization is mildly incompatible with the old.
(Won't explode, but it'll post a complaint and ignore the stuff
it doesn't recognize.)
While I was at it, I finished removing DciReverse. It's still part
of the 2005-string-types regression test, which currently fails
because the generated source doesn't match.
2019-08-07 22:23:23 +00:00
|
|
|
|
case FormatDescriptor.Type.StringGeneric:
|
|
|
|
|
preferredFormat = radioStringMixed;
|
|
|
|
|
break;
|
|
|
|
|
case FormatDescriptor.Type.StringReverse:
|
|
|
|
|
preferredFormat = radioStringMixedReverse;
|
|
|
|
|
break;
|
|
|
|
|
case FormatDescriptor.Type.StringNullTerm:
|
|
|
|
|
preferredFormat = radioStringNullTerm;
|
|
|
|
|
break;
|
|
|
|
|
case FormatDescriptor.Type.StringL8:
|
|
|
|
|
preferredFormat = radioStringLen8;
|
|
|
|
|
break;
|
|
|
|
|
case FormatDescriptor.Type.StringL16:
|
|
|
|
|
preferredFormat = radioStringLen16;
|
|
|
|
|
break;
|
|
|
|
|
case FormatDescriptor.Type.StringDci:
|
|
|
|
|
preferredFormat = radioStringDci;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
break;
|
|
|
|
|
case FormatDescriptor.Type.Dense:
|
|
|
|
|
preferredFormat = radioDenseHex;
|
|
|
|
|
break;
|
|
|
|
|
case FormatDescriptor.Type.Fill:
|
|
|
|
|
preferredFormat = radioFill;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
// Should not be here.
|
|
|
|
|
Debug.Assert(false);
|
|
|
|
|
preferredFormat = radioDefaultFormat;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-11 20:56:16 +00:00
|
|
|
|
if (preferredFormat.IsEnabled) {
|
|
|
|
|
preferredFormat.IsChecked = true;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
} else {
|
|
|
|
|
mPreferredFormatUnavailable = true;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
radioDefaultFormat.IsChecked = true;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
}
|
2019-08-16 00:53:12 +00:00
|
|
|
|
|
|
|
|
|
SetStringEncoding(textMode);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private TextScanMode TextScanModeFromDescriptor(FormatDescriptor dfd) {
|
|
|
|
|
Debug.Assert(dfd.IsString);
|
|
|
|
|
switch (dfd.FormatSubType) {
|
|
|
|
|
case FormatDescriptor.SubType.Ascii:
|
|
|
|
|
case FormatDescriptor.SubType.HighAscii:
|
|
|
|
|
return TextScanMode.LowHighAscii;
|
|
|
|
|
case FormatDescriptor.SubType.C64Petscii:
|
|
|
|
|
return TextScanMode.C64Petscii;
|
|
|
|
|
case FormatDescriptor.SubType.C64Screen:
|
|
|
|
|
return TextScanMode.C64ScreenCode;
|
|
|
|
|
default:
|
|
|
|
|
Debug.Assert(false);
|
|
|
|
|
return TextScanMode.LowHighAscii;
|
|
|
|
|
}
|
2019-07-09 00:02:25 +00:00
|
|
|
|
}
|
|
|
|
|
|
2019-07-11 20:56:16 +00:00
|
|
|
|
#endregion Setup
|
|
|
|
|
|
|
|
|
|
#region FormatDescriptor creation
|
|
|
|
|
|
2019-07-09 00:02:25 +00:00
|
|
|
|
/// <summary>
|
|
|
|
|
/// Creates a list of FormatDescriptors, based on the current control configuration.
|
|
|
|
|
///
|
|
|
|
|
/// The entries in the list are guaranteed to be sorted by start address and not
|
|
|
|
|
/// overlap.
|
|
|
|
|
///
|
|
|
|
|
/// We assume that whatever the control gives us is correct, e.g. it's not going
|
|
|
|
|
/// to tell us to put a buffer full of zeroes into a DCI string.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <returns>Result list.</returns>
|
|
|
|
|
private void CreateDescriptorListFromControls() {
|
|
|
|
|
FormatDescriptor.Type type = FormatDescriptor.Type.Default;
|
|
|
|
|
FormatDescriptor.SubType subType = FormatDescriptor.SubType.None;
|
|
|
|
|
WeakSymbolRef symbolRef = null;
|
|
|
|
|
int chunkLength = -1;
|
|
|
|
|
|
2019-08-16 00:53:12 +00:00
|
|
|
|
FormatDescriptor.SubType charSubType;
|
|
|
|
|
CharEncoding.InclusionTest charTest;
|
|
|
|
|
StringEncodingItem item = (StringEncodingItem)stringEncodingComboBox.SelectedItem;
|
|
|
|
|
switch (item.Mode) {
|
|
|
|
|
case TextScanMode.LowAscii:
|
|
|
|
|
charSubType = FormatDescriptor.SubType.Ascii;
|
|
|
|
|
charTest = CharEncoding.IsExtendedAscii;
|
|
|
|
|
break;
|
|
|
|
|
case TextScanMode.LowHighAscii:
|
|
|
|
|
charSubType = FormatDescriptor.SubType.ASCII_GENERIC;
|
|
|
|
|
charTest = CharEncoding.IsExtendedLowOrHighAscii;
|
|
|
|
|
break;
|
|
|
|
|
case TextScanMode.C64Petscii:
|
|
|
|
|
charSubType = FormatDescriptor.SubType.C64Petscii;
|
|
|
|
|
charTest = CharEncoding.IsExtendedC64Petscii;
|
|
|
|
|
break;
|
|
|
|
|
case TextScanMode.C64ScreenCode:
|
|
|
|
|
charSubType = FormatDescriptor.SubType.C64Screen;
|
|
|
|
|
charTest = CharEncoding.IsExtendedC64ScreenCode;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
Debug.Assert(false);
|
|
|
|
|
charSubType = FormatDescriptor.SubType.ASCII_GENERIC;
|
|
|
|
|
charTest = CharEncoding.IsExtendedLowOrHighAscii;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-09 00:02:25 +00:00
|
|
|
|
// Decode the "display as" panel, if it's relevant.
|
2019-07-11 20:56:16 +00:00
|
|
|
|
if (radioSimpleDataHex.IsEnabled) {
|
|
|
|
|
if (radioSimpleDataHex.IsChecked == true) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
subType = FormatDescriptor.SubType.Hex;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
} else if (radioSimpleDataDecimal.IsChecked == true) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
subType = FormatDescriptor.SubType.Decimal;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
} else if (radioSimpleDataBinary.IsChecked == true) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
subType = FormatDescriptor.SubType.Binary;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
} else if (radioSimpleDataAscii.IsChecked == true) {
|
2019-08-10 21:24:19 +00:00
|
|
|
|
subType = FormatDescriptor.SubType.ASCII_GENERIC;
|
2019-08-16 00:53:12 +00:00
|
|
|
|
} else if (radioSimpleDataPetscii.IsChecked == true) {
|
|
|
|
|
subType = FormatDescriptor.SubType.C64Petscii;
|
|
|
|
|
} else if (radioSimpleDataScreenCode.IsChecked == true) {
|
|
|
|
|
subType = FormatDescriptor.SubType.C64Screen;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
} else if (radioSimpleDataAddress.IsChecked == true) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
subType = FormatDescriptor.SubType.Address;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
} else if (radioSimpleDataSymbolic.IsChecked == true) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
WeakSymbolRef.Part part;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
if (radioSymbolPartLow.IsChecked == true) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
part = WeakSymbolRef.Part.Low;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
} else if (radioSymbolPartHigh.IsChecked == true) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
part = WeakSymbolRef.Part.High;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
} else if (radioSymbolPartBank.IsChecked == true) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
part = WeakSymbolRef.Part.Bank;
|
|
|
|
|
} else {
|
|
|
|
|
Debug.Assert(false);
|
|
|
|
|
part = WeakSymbolRef.Part.Low;
|
|
|
|
|
}
|
|
|
|
|
subType = FormatDescriptor.SubType.Symbol;
|
|
|
|
|
symbolRef = new WeakSymbolRef(symbolEntryTextBox.Text, part);
|
|
|
|
|
} else {
|
|
|
|
|
Debug.Assert(false);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
subType = 0; // set later, or doesn't matter
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Decode the main format.
|
2019-07-11 20:56:16 +00:00
|
|
|
|
if (radioDefaultFormat.IsChecked == true) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
// Default/None; note this would create a multi-byte Default format, which isn't
|
|
|
|
|
// really allowed. What we actually want to do is remove the explicit formatting
|
|
|
|
|
// from all spanned offsets, so we use a dedicated type for that.
|
|
|
|
|
type = FormatDescriptor.Type.REMOVE;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
} else if (radioSingleBytes.IsChecked == true) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
type = FormatDescriptor.Type.NumericLE;
|
|
|
|
|
chunkLength = 1;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
} else if (radio16BitLittle.IsChecked == true) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
type = FormatDescriptor.Type.NumericLE;
|
|
|
|
|
chunkLength = 2;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
} else if (radio16BitBig.IsChecked == true) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
type = FormatDescriptor.Type.NumericBE;
|
|
|
|
|
chunkLength = 2;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
} else if (radio24BitLittle.IsChecked == true) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
type = FormatDescriptor.Type.NumericLE;
|
|
|
|
|
chunkLength = 3;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
} else if (radio32BitLittle.IsChecked == true) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
type = FormatDescriptor.Type.NumericLE;
|
|
|
|
|
chunkLength = 4;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
} else if (radioDenseHex.IsChecked == true) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
type = FormatDescriptor.Type.Dense;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
} else if (radioFill.IsChecked == true) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
type = FormatDescriptor.Type.Fill;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
} else if (radioStringMixed.IsChecked == true) {
|
Change the way string formats are defined
We used to use type="String", with the sub-type indicating whether
the string was null-terminated, prefixed with a length, or whatever.
This didn't leave much room for specifying a character encoding,
which is orthogonal to the sub-type.
What we actually want is to have the type specify the string type,
and then have the sub-type determine the character encoding. These
sub-types can also be used with the Numeric type to specify the
encoding of character operands.
This change updates the enum definitions and the various bits of
code that use them, but does not add any code for working with
non-ASCII character encodings.
The project file version number was incremented to 2, since the new
FormatDescriptor serialization is mildly incompatible with the old.
(Won't explode, but it'll post a complaint and ignore the stuff
it doesn't recognize.)
While I was at it, I finished removing DciReverse. It's still part
of the 2005-string-types regression test, which currently fails
because the generated source doesn't match.
2019-08-07 22:23:23 +00:00
|
|
|
|
type = FormatDescriptor.Type.StringGeneric;
|
2019-08-16 00:53:12 +00:00
|
|
|
|
subType = charSubType;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
} else if (radioStringMixedReverse.IsChecked == true) {
|
Change the way string formats are defined
We used to use type="String", with the sub-type indicating whether
the string was null-terminated, prefixed with a length, or whatever.
This didn't leave much room for specifying a character encoding,
which is orthogonal to the sub-type.
What we actually want is to have the type specify the string type,
and then have the sub-type determine the character encoding. These
sub-types can also be used with the Numeric type to specify the
encoding of character operands.
This change updates the enum definitions and the various bits of
code that use them, but does not add any code for working with
non-ASCII character encodings.
The project file version number was incremented to 2, since the new
FormatDescriptor serialization is mildly incompatible with the old.
(Won't explode, but it'll post a complaint and ignore the stuff
it doesn't recognize.)
While I was at it, I finished removing DciReverse. It's still part
of the 2005-string-types regression test, which currently fails
because the generated source doesn't match.
2019-08-07 22:23:23 +00:00
|
|
|
|
type = FormatDescriptor.Type.StringReverse;
|
2019-08-16 00:53:12 +00:00
|
|
|
|
subType = charSubType;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
} else if (radioStringNullTerm.IsChecked == true) {
|
Change the way string formats are defined
We used to use type="String", with the sub-type indicating whether
the string was null-terminated, prefixed with a length, or whatever.
This didn't leave much room for specifying a character encoding,
which is orthogonal to the sub-type.
What we actually want is to have the type specify the string type,
and then have the sub-type determine the character encoding. These
sub-types can also be used with the Numeric type to specify the
encoding of character operands.
This change updates the enum definitions and the various bits of
code that use them, but does not add any code for working with
non-ASCII character encodings.
The project file version number was incremented to 2, since the new
FormatDescriptor serialization is mildly incompatible with the old.
(Won't explode, but it'll post a complaint and ignore the stuff
it doesn't recognize.)
While I was at it, I finished removing DciReverse. It's still part
of the 2005-string-types regression test, which currently fails
because the generated source doesn't match.
2019-08-07 22:23:23 +00:00
|
|
|
|
type = FormatDescriptor.Type.StringNullTerm;
|
2019-08-16 00:53:12 +00:00
|
|
|
|
subType = charSubType;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
} else if (radioStringLen8.IsChecked == true) {
|
Change the way string formats are defined
We used to use type="String", with the sub-type indicating whether
the string was null-terminated, prefixed with a length, or whatever.
This didn't leave much room for specifying a character encoding,
which is orthogonal to the sub-type.
What we actually want is to have the type specify the string type,
and then have the sub-type determine the character encoding. These
sub-types can also be used with the Numeric type to specify the
encoding of character operands.
This change updates the enum definitions and the various bits of
code that use them, but does not add any code for working with
non-ASCII character encodings.
The project file version number was incremented to 2, since the new
FormatDescriptor serialization is mildly incompatible with the old.
(Won't explode, but it'll post a complaint and ignore the stuff
it doesn't recognize.)
While I was at it, I finished removing DciReverse. It's still part
of the 2005-string-types regression test, which currently fails
because the generated source doesn't match.
2019-08-07 22:23:23 +00:00
|
|
|
|
type = FormatDescriptor.Type.StringL8;
|
2019-08-16 00:53:12 +00:00
|
|
|
|
subType = charSubType;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
} else if (radioStringLen16.IsChecked == true) {
|
Change the way string formats are defined
We used to use type="String", with the sub-type indicating whether
the string was null-terminated, prefixed with a length, or whatever.
This didn't leave much room for specifying a character encoding,
which is orthogonal to the sub-type.
What we actually want is to have the type specify the string type,
and then have the sub-type determine the character encoding. These
sub-types can also be used with the Numeric type to specify the
encoding of character operands.
This change updates the enum definitions and the various bits of
code that use them, but does not add any code for working with
non-ASCII character encodings.
The project file version number was incremented to 2, since the new
FormatDescriptor serialization is mildly incompatible with the old.
(Won't explode, but it'll post a complaint and ignore the stuff
it doesn't recognize.)
While I was at it, I finished removing DciReverse. It's still part
of the 2005-string-types regression test, which currently fails
because the generated source doesn't match.
2019-08-07 22:23:23 +00:00
|
|
|
|
type = FormatDescriptor.Type.StringL16;
|
2019-08-16 00:53:12 +00:00
|
|
|
|
subType = charSubType;
|
2019-07-11 20:56:16 +00:00
|
|
|
|
} else if (radioStringDci.IsChecked == true) {
|
Change the way string formats are defined
We used to use type="String", with the sub-type indicating whether
the string was null-terminated, prefixed with a length, or whatever.
This didn't leave much room for specifying a character encoding,
which is orthogonal to the sub-type.
What we actually want is to have the type specify the string type,
and then have the sub-type determine the character encoding. These
sub-types can also be used with the Numeric type to specify the
encoding of character operands.
This change updates the enum definitions and the various bits of
code that use them, but does not add any code for working with
non-ASCII character encodings.
The project file version number was incremented to 2, since the new
FormatDescriptor serialization is mildly incompatible with the old.
(Won't explode, but it'll post a complaint and ignore the stuff
it doesn't recognize.)
While I was at it, I finished removing DciReverse. It's still part
of the 2005-string-types regression test, which currently fails
because the generated source doesn't match.
2019-08-07 22:23:23 +00:00
|
|
|
|
type = FormatDescriptor.Type.StringDci;
|
2019-08-16 00:53:12 +00:00
|
|
|
|
subType = charSubType;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
} else {
|
|
|
|
|
Debug.Assert(false);
|
|
|
|
|
// default/none
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Results = new SortedList<int, FormatDescriptor>();
|
|
|
|
|
|
2019-07-11 20:56:16 +00:00
|
|
|
|
IEnumerator<TypedRangeSet.TypedRange> iter = mSelection.RangeListIterator;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
while (iter.MoveNext()) {
|
|
|
|
|
TypedRangeSet.TypedRange rng = iter.Current;
|
|
|
|
|
|
Change the way string formats are defined
We used to use type="String", with the sub-type indicating whether
the string was null-terminated, prefixed with a length, or whatever.
This didn't leave much room for specifying a character encoding,
which is orthogonal to the sub-type.
What we actually want is to have the type specify the string type,
and then have the sub-type determine the character encoding. These
sub-types can also be used with the Numeric type to specify the
encoding of character operands.
This change updates the enum definitions and the various bits of
code that use them, but does not add any code for working with
non-ASCII character encodings.
The project file version number was incremented to 2, since the new
FormatDescriptor serialization is mildly incompatible with the old.
(Won't explode, but it'll post a complaint and ignore the stuff
it doesn't recognize.)
While I was at it, I finished removing DciReverse. It's still part
of the 2005-string-types regression test, which currently fails
because the generated source doesn't match.
2019-08-07 22:23:23 +00:00
|
|
|
|
switch (type) {
|
|
|
|
|
case FormatDescriptor.Type.StringGeneric:
|
2019-08-16 00:53:12 +00:00
|
|
|
|
CreateMixedStringEntries(rng.Low, rng.High, type, subType, charTest);
|
|
|
|
|
break;
|
Change the way string formats are defined
We used to use type="String", with the sub-type indicating whether
the string was null-terminated, prefixed with a length, or whatever.
This didn't leave much room for specifying a character encoding,
which is orthogonal to the sub-type.
What we actually want is to have the type specify the string type,
and then have the sub-type determine the character encoding. These
sub-types can also be used with the Numeric type to specify the
encoding of character operands.
This change updates the enum definitions and the various bits of
code that use them, but does not add any code for working with
non-ASCII character encodings.
The project file version number was incremented to 2, since the new
FormatDescriptor serialization is mildly incompatible with the old.
(Won't explode, but it'll post a complaint and ignore the stuff
it doesn't recognize.)
While I was at it, I finished removing DciReverse. It's still part
of the 2005-string-types regression test, which currently fails
because the generated source doesn't match.
2019-08-07 22:23:23 +00:00
|
|
|
|
case FormatDescriptor.Type.StringReverse:
|
2019-08-16 00:53:12 +00:00
|
|
|
|
CreateMixedStringEntries(rng.Low, rng.High, type, subType, charTest);
|
Change the way string formats are defined
We used to use type="String", with the sub-type indicating whether
the string was null-terminated, prefixed with a length, or whatever.
This didn't leave much room for specifying a character encoding,
which is orthogonal to the sub-type.
What we actually want is to have the type specify the string type,
and then have the sub-type determine the character encoding. These
sub-types can also be used with the Numeric type to specify the
encoding of character operands.
This change updates the enum definitions and the various bits of
code that use them, but does not add any code for working with
non-ASCII character encodings.
The project file version number was incremented to 2, since the new
FormatDescriptor serialization is mildly incompatible with the old.
(Won't explode, but it'll post a complaint and ignore the stuff
it doesn't recognize.)
While I was at it, I finished removing DciReverse. It's still part
of the 2005-string-types regression test, which currently fails
because the generated source doesn't match.
2019-08-07 22:23:23 +00:00
|
|
|
|
break;
|
|
|
|
|
case FormatDescriptor.Type.StringNullTerm:
|
|
|
|
|
CreateCStringEntries(rng.Low, rng.High, type, subType);
|
|
|
|
|
break;
|
|
|
|
|
case FormatDescriptor.Type.StringL8:
|
|
|
|
|
case FormatDescriptor.Type.StringL16:
|
|
|
|
|
CreateLengthStringEntries(rng.Low, rng.High, type, subType);
|
|
|
|
|
break;
|
|
|
|
|
case FormatDescriptor.Type.StringDci:
|
|
|
|
|
CreateDciStringEntries(rng.Low, rng.High, type, subType);
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
CreateSimpleEntries(type, subType, chunkLength, symbolRef,
|
|
|
|
|
rng.Low, rng.High);
|
|
|
|
|
break;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Creates one or more FormatDescriptor entries for the specified range, adding them
|
|
|
|
|
/// to the Results list.
|
|
|
|
|
///
|
|
|
|
|
/// This will either create one entry that spans the entire range (for e.g. strings
|
|
|
|
|
/// and bulk data), or create equal-sized chunks.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="type">Region data type.</param>
|
|
|
|
|
/// <param name="subType">Region data sub-type.</param>
|
|
|
|
|
/// <param name="chunkLength">Length of a chunk, or -1 for full buffer.</param>
|
|
|
|
|
/// <param name="symbolRef">Symbol reference, or null if not applicable.</param>
|
|
|
|
|
/// <param name="low">Offset of first byte in range.</param>
|
|
|
|
|
/// <param name="high">Offset of last byte in range.</param>
|
|
|
|
|
private void CreateSimpleEntries(FormatDescriptor.Type type,
|
|
|
|
|
FormatDescriptor.SubType subType, int chunkLength,
|
|
|
|
|
WeakSymbolRef symbolRef, int low, int high) {
|
|
|
|
|
|
|
|
|
|
if (chunkLength == -1) {
|
|
|
|
|
chunkLength = (high - low) + 1;
|
|
|
|
|
}
|
|
|
|
|
Debug.Assert(((high - low + 1) / chunkLength) * chunkLength == high - low + 1);
|
|
|
|
|
|
|
|
|
|
// Either we have one chunk, or we have multiple chunks with the same type and
|
|
|
|
|
// length. Either way, we only need to create the descriptor once. (This is
|
|
|
|
|
// safe because FormatDescriptor instances are immutable.)
|
|
|
|
|
//
|
2019-08-10 21:24:19 +00:00
|
|
|
|
// The one exception to this is ASCII values for non-string data, because we have
|
|
|
|
|
// to dig the low vs. high value out of the data itself.
|
2019-07-09 00:02:25 +00:00
|
|
|
|
FormatDescriptor dfd;
|
|
|
|
|
if (subType == FormatDescriptor.SubType.Symbol) {
|
|
|
|
|
dfd = FormatDescriptor.Create(chunkLength, symbolRef,
|
|
|
|
|
type == FormatDescriptor.Type.NumericBE);
|
|
|
|
|
} else {
|
|
|
|
|
dfd = FormatDescriptor.Create(chunkLength, type, subType);
|
|
|
|
|
}
|
|
|
|
|
while (low <= high) {
|
2019-08-10 21:24:19 +00:00
|
|
|
|
if (subType == FormatDescriptor.SubType.ASCII_GENERIC) {
|
|
|
|
|
Debug.Assert(dfd.IsNumeric);
|
|
|
|
|
int val = RawData.GetWord(mFileData, low, dfd.Length,
|
|
|
|
|
type == FormatDescriptor.Type.NumericBE);
|
|
|
|
|
FormatDescriptor.SubType actualSubType = (val > 0x7f) ?
|
2019-08-12 00:59:20 +00:00
|
|
|
|
FormatDescriptor.SubType.HighAscii : FormatDescriptor.SubType.Ascii;
|
2019-08-10 21:24:19 +00:00
|
|
|
|
if (actualSubType != dfd.FormatSubType) {
|
|
|
|
|
// replace the descriptor
|
|
|
|
|
dfd = FormatDescriptor.Create(chunkLength, type, actualSubType);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-09 00:02:25 +00:00
|
|
|
|
Results.Add(low, dfd);
|
|
|
|
|
low += chunkLength;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Creates one or more FormatDescriptor entries for the specified range, adding them
|
2019-08-16 00:53:12 +00:00
|
|
|
|
/// to the Results list. Runs of character data are output as generic strings, while any
|
|
|
|
|
/// non-character data is output as individual bytes.
|
2019-07-09 00:02:25 +00:00
|
|
|
|
/// </summary>
|
2019-08-16 00:53:12 +00:00
|
|
|
|
/// <remarks>
|
|
|
|
|
/// This is the only string create function that accepts a mix of valid and invalid
|
|
|
|
|
/// characters.
|
|
|
|
|
/// </remarks>
|
2019-07-09 00:02:25 +00:00
|
|
|
|
/// <param name="low">Offset of first byte in range.</param>
|
|
|
|
|
/// <param name="high">Offset of last byte in range.</param>
|
2019-08-16 00:53:12 +00:00
|
|
|
|
/// <param name="type">String type (Generic or Reverse).</param>
|
2019-07-09 00:02:25 +00:00
|
|
|
|
/// <param name="subType">String sub-type.</param>
|
2019-08-16 00:53:12 +00:00
|
|
|
|
/// <param name="charTest">Character test delegate.</param>
|
Change the way string formats are defined
We used to use type="String", with the sub-type indicating whether
the string was null-terminated, prefixed with a length, or whatever.
This didn't leave much room for specifying a character encoding,
which is orthogonal to the sub-type.
What we actually want is to have the type specify the string type,
and then have the sub-type determine the character encoding. These
sub-types can also be used with the Numeric type to specify the
encoding of character operands.
This change updates the enum definitions and the various bits of
code that use them, but does not add any code for working with
non-ASCII character encodings.
The project file version number was incremented to 2, since the new
FormatDescriptor serialization is mildly incompatible with the old.
(Won't explode, but it'll post a complaint and ignore the stuff
it doesn't recognize.)
While I was at it, I finished removing DciReverse. It's still part
of the 2005-string-types regression test, which currently fails
because the generated source doesn't match.
2019-08-07 22:23:23 +00:00
|
|
|
|
private void CreateMixedStringEntries(int low, int high, FormatDescriptor.Type type,
|
2019-08-16 00:53:12 +00:00
|
|
|
|
FormatDescriptor.SubType subType, CharEncoding.InclusionTest charTest) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
int stringStart = -1;
|
|
|
|
|
int cur;
|
2019-08-16 00:53:12 +00:00
|
|
|
|
|
|
|
|
|
if (subType == FormatDescriptor.SubType.ASCII_GENERIC) {
|
|
|
|
|
int highBit = 0;
|
|
|
|
|
for (cur = low; cur <= high; cur++) {
|
|
|
|
|
byte val = mFileData[cur];
|
|
|
|
|
if (charTest(val)) {
|
|
|
|
|
// is ASCII
|
|
|
|
|
if (stringStart >= 0) {
|
|
|
|
|
// was in a string
|
|
|
|
|
if (highBit != (val & 0x80)) {
|
|
|
|
|
// end of string due to high bit flip, output
|
|
|
|
|
CreateGenericStringOrByte(stringStart, cur - stringStart,
|
|
|
|
|
type, subType);
|
|
|
|
|
// start a new string
|
|
|
|
|
stringStart = cur;
|
|
|
|
|
} else {
|
|
|
|
|
// still in string, keep going
|
|
|
|
|
}
|
2019-07-09 00:02:25 +00:00
|
|
|
|
} else {
|
2019-08-16 00:53:12 +00:00
|
|
|
|
// wasn't in a string, start one
|
|
|
|
|
stringStart = cur;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
}
|
2019-08-16 00:53:12 +00:00
|
|
|
|
highBit = val & 0x80;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
} else {
|
2019-08-16 00:53:12 +00:00
|
|
|
|
// not ASCII
|
|
|
|
|
if (stringStart >= 0) {
|
|
|
|
|
// was in a string, output it
|
|
|
|
|
CreateGenericStringOrByte(stringStart, cur - stringStart,
|
|
|
|
|
type, subType);
|
|
|
|
|
stringStart = -1;
|
|
|
|
|
}
|
|
|
|
|
// output as single byte
|
|
|
|
|
CreateByteFD(cur, FormatDescriptor.SubType.Hex);
|
2019-07-09 00:02:25 +00:00
|
|
|
|
}
|
2019-08-16 00:53:12 +00:00
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
for (cur = low; cur <= high; cur++) {
|
|
|
|
|
byte val = mFileData[cur];
|
|
|
|
|
if (charTest(val)) {
|
|
|
|
|
// is character
|
|
|
|
|
if (stringStart < 0) {
|
|
|
|
|
// mark this as the start of the string
|
|
|
|
|
stringStart = cur;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
// not character
|
|
|
|
|
if (stringStart >= 0) {
|
|
|
|
|
// was in a string, output it
|
|
|
|
|
CreateGenericStringOrByte(stringStart, cur - stringStart,
|
|
|
|
|
type, subType);
|
|
|
|
|
stringStart = -1;
|
|
|
|
|
}
|
|
|
|
|
// output as single byte
|
|
|
|
|
CreateByteFD(cur, FormatDescriptor.SubType.Hex);
|
2019-07-09 00:02:25 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2019-08-16 00:53:12 +00:00
|
|
|
|
|
2019-07-09 00:02:25 +00:00
|
|
|
|
}
|
|
|
|
|
if (stringStart >= 0) {
|
|
|
|
|
// close out the string
|
2019-08-16 00:53:12 +00:00
|
|
|
|
CreateGenericStringOrByte(stringStart, cur - stringStart, type, subType);
|
2019-07-09 00:02:25 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-08-16 00:53:12 +00:00
|
|
|
|
private FormatDescriptor.SubType ResolveAsciiGeneric(int offset,
|
|
|
|
|
FormatDescriptor.SubType subType) {
|
|
|
|
|
if (subType == FormatDescriptor.SubType.ASCII_GENERIC) {
|
|
|
|
|
if ((mFileData[offset] & 0x80) != 0) {
|
|
|
|
|
subType = FormatDescriptor.SubType.HighAscii;
|
|
|
|
|
} else {
|
|
|
|
|
subType = FormatDescriptor.SubType.Ascii;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return subType;
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-09 00:02:25 +00:00
|
|
|
|
/// <summary>
|
2019-08-16 00:53:12 +00:00
|
|
|
|
/// Creates a format descriptor for character data. If the data is only one byte long,
|
|
|
|
|
/// a single-byte character item is emitted instead.
|
2019-07-09 00:02:25 +00:00
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="offset">Offset of first byte.</param>
|
|
|
|
|
/// <param name="length">Length of string.</param>
|
2019-08-16 00:53:12 +00:00
|
|
|
|
/// <param name="type">String type (Generic or Reverse).</param>
|
|
|
|
|
/// <param name="subType">String sub-type. If set to ASCII_GENERIC, this will
|
|
|
|
|
/// refine the sub-type.</param>
|
|
|
|
|
private void CreateGenericStringOrByte(int offset, int length,
|
|
|
|
|
FormatDescriptor.Type type, FormatDescriptor.SubType subType) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
Debug.Assert(length > 0);
|
2019-08-16 00:53:12 +00:00
|
|
|
|
subType = ResolveAsciiGeneric(offset, subType);
|
2019-07-09 00:02:25 +00:00
|
|
|
|
if (length == 1) {
|
2019-08-10 21:24:19 +00:00
|
|
|
|
// Single byte, output as single char rather than 1-byte string. We use the
|
|
|
|
|
// same encoding as the rest of the string.
|
|
|
|
|
CreateByteFD(offset, subType);
|
2019-07-09 00:02:25 +00:00
|
|
|
|
} else {
|
|
|
|
|
FormatDescriptor dfd;
|
2019-08-16 00:53:12 +00:00
|
|
|
|
dfd = FormatDescriptor.Create(length, type, subType);
|
2019-07-09 00:02:25 +00:00
|
|
|
|
Results.Add(offset, dfd);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Creates a format descriptor for a single-byte numeric value.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="offset">File offset.</param>
|
|
|
|
|
/// <param name="subType">How to format the item.</param>
|
|
|
|
|
private void CreateByteFD(int offset, FormatDescriptor.SubType subType) {
|
|
|
|
|
FormatDescriptor dfd = FormatDescriptor.Create(1,
|
|
|
|
|
FormatDescriptor.Type.NumericLE, subType);
|
|
|
|
|
Results.Add(offset, dfd);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Creates one or more FormatDescriptor entries for the specified range, adding them
|
|
|
|
|
/// to the Results list.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="low">Offset of first byte in range.</param>
|
|
|
|
|
/// <param name="high">Offset of last byte in range.</param>
|
|
|
|
|
/// <param name="subType">String sub-type.</param>
|
Change the way string formats are defined
We used to use type="String", with the sub-type indicating whether
the string was null-terminated, prefixed with a length, or whatever.
This didn't leave much room for specifying a character encoding,
which is orthogonal to the sub-type.
What we actually want is to have the type specify the string type,
and then have the sub-type determine the character encoding. These
sub-types can also be used with the Numeric type to specify the
encoding of character operands.
This change updates the enum definitions and the various bits of
code that use them, but does not add any code for working with
non-ASCII character encodings.
The project file version number was incremented to 2, since the new
FormatDescriptor serialization is mildly incompatible with the old.
(Won't explode, but it'll post a complaint and ignore the stuff
it doesn't recognize.)
While I was at it, I finished removing DciReverse. It's still part
of the 2005-string-types regression test, which currently fails
because the generated source doesn't match.
2019-08-07 22:23:23 +00:00
|
|
|
|
private void CreateCStringEntries(int low, int high, FormatDescriptor.Type type,
|
2019-07-09 00:02:25 +00:00
|
|
|
|
FormatDescriptor.SubType subType) {
|
|
|
|
|
int startOffset = low;
|
|
|
|
|
for (int i = low; i <= high; i++) {
|
|
|
|
|
if (mFileData[i] == 0x00) {
|
|
|
|
|
// End of string. Zero-length strings are allowed.
|
|
|
|
|
FormatDescriptor dfd = FormatDescriptor.Create(
|
2019-08-16 00:53:12 +00:00
|
|
|
|
i - startOffset + 1, type, ResolveAsciiGeneric(startOffset, subType));
|
2019-07-09 00:02:25 +00:00
|
|
|
|
Results.Add(startOffset, dfd);
|
|
|
|
|
startOffset = i + 1;
|
|
|
|
|
} else {
|
|
|
|
|
// keep going
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Earlier analysis guaranteed that the last byte in the buffer is 0x00.
|
|
|
|
|
Debug.Assert(startOffset == high + 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Creates one or more FormatDescriptor entries for the specified range, adding them
|
|
|
|
|
/// to the Results list.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="low">Offset of first byte in range.</param>
|
|
|
|
|
/// <param name="high">Offset of last byte in range.</param>
|
|
|
|
|
/// <param name="subType">String sub-type.</param>
|
Change the way string formats are defined
We used to use type="String", with the sub-type indicating whether
the string was null-terminated, prefixed with a length, or whatever.
This didn't leave much room for specifying a character encoding,
which is orthogonal to the sub-type.
What we actually want is to have the type specify the string type,
and then have the sub-type determine the character encoding. These
sub-types can also be used with the Numeric type to specify the
encoding of character operands.
This change updates the enum definitions and the various bits of
code that use them, but does not add any code for working with
non-ASCII character encodings.
The project file version number was incremented to 2, since the new
FormatDescriptor serialization is mildly incompatible with the old.
(Won't explode, but it'll post a complaint and ignore the stuff
it doesn't recognize.)
While I was at it, I finished removing DciReverse. It's still part
of the 2005-string-types regression test, which currently fails
because the generated source doesn't match.
2019-08-07 22:23:23 +00:00
|
|
|
|
private void CreateLengthStringEntries(int low, int high, FormatDescriptor.Type type,
|
2019-07-09 00:02:25 +00:00
|
|
|
|
FormatDescriptor.SubType subType) {
|
|
|
|
|
int i;
|
|
|
|
|
for (i = low; i <= high;) {
|
|
|
|
|
int length = mFileData[i];
|
Change the way string formats are defined
We used to use type="String", with the sub-type indicating whether
the string was null-terminated, prefixed with a length, or whatever.
This didn't leave much room for specifying a character encoding,
which is orthogonal to the sub-type.
What we actually want is to have the type specify the string type,
and then have the sub-type determine the character encoding. These
sub-types can also be used with the Numeric type to specify the
encoding of character operands.
This change updates the enum definitions and the various bits of
code that use them, but does not add any code for working with
non-ASCII character encodings.
The project file version number was incremented to 2, since the new
FormatDescriptor serialization is mildly incompatible with the old.
(Won't explode, but it'll post a complaint and ignore the stuff
it doesn't recognize.)
While I was at it, I finished removing DciReverse. It's still part
of the 2005-string-types regression test, which currently fails
because the generated source doesn't match.
2019-08-07 22:23:23 +00:00
|
|
|
|
if (type == FormatDescriptor.Type.StringL16) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
length |= mFileData[i + 1] << 8;
|
|
|
|
|
length += 2;
|
|
|
|
|
} else {
|
|
|
|
|
length++;
|
|
|
|
|
}
|
|
|
|
|
// Zero-length strings are allowed.
|
2019-08-16 00:53:12 +00:00
|
|
|
|
FormatDescriptor dfd = FormatDescriptor.Create(length, type,
|
|
|
|
|
ResolveAsciiGeneric(i, subType));
|
2019-07-09 00:02:25 +00:00
|
|
|
|
Results.Add(i, dfd);
|
|
|
|
|
i += length;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Debug.Assert(i == high + 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Creates one or more FormatDescriptor entries for the specified range, adding them
|
|
|
|
|
/// to the Results list.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="low">Offset of first byte in range.</param>
|
|
|
|
|
/// <param name="high">Offset of last byte in range.</param>
|
|
|
|
|
/// <param name="subType">String sub-type.</param>
|
Change the way string formats are defined
We used to use type="String", with the sub-type indicating whether
the string was null-terminated, prefixed with a length, or whatever.
This didn't leave much room for specifying a character encoding,
which is orthogonal to the sub-type.
What we actually want is to have the type specify the string type,
and then have the sub-type determine the character encoding. These
sub-types can also be used with the Numeric type to specify the
encoding of character operands.
This change updates the enum definitions and the various bits of
code that use them, but does not add any code for working with
non-ASCII character encodings.
The project file version number was incremented to 2, since the new
FormatDescriptor serialization is mildly incompatible with the old.
(Won't explode, but it'll post a complaint and ignore the stuff
it doesn't recognize.)
While I was at it, I finished removing DciReverse. It's still part
of the 2005-string-types regression test, which currently fails
because the generated source doesn't match.
2019-08-07 22:23:23 +00:00
|
|
|
|
private void CreateDciStringEntries(int low, int high, FormatDescriptor.Type type,
|
2019-07-09 00:02:25 +00:00
|
|
|
|
FormatDescriptor.SubType subType) {
|
Change the way string formats are defined
We used to use type="String", with the sub-type indicating whether
the string was null-terminated, prefixed with a length, or whatever.
This didn't leave much room for specifying a character encoding,
which is orthogonal to the sub-type.
What we actually want is to have the type specify the string type,
and then have the sub-type determine the character encoding. These
sub-types can also be used with the Numeric type to specify the
encoding of character operands.
This change updates the enum definitions and the various bits of
code that use them, but does not add any code for working with
non-ASCII character encodings.
The project file version number was incremented to 2, since the new
FormatDescriptor serialization is mildly incompatible with the old.
(Won't explode, but it'll post a complaint and ignore the stuff
it doesn't recognize.)
While I was at it, I finished removing DciReverse. It's still part
of the 2005-string-types regression test, which currently fails
because the generated source doesn't match.
2019-08-07 22:23:23 +00:00
|
|
|
|
int end, endMask;
|
|
|
|
|
|
|
|
|
|
end = high + 1;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
|
|
|
|
|
// Zero-length strings aren't a thing for DCI. The analyzer requires that all
|
|
|
|
|
// strings in a region have the same polarity, so just grab the last byte.
|
|
|
|
|
endMask = mFileData[end - 1] & 0x80;
|
|
|
|
|
|
Change the way string formats are defined
We used to use type="String", with the sub-type indicating whether
the string was null-terminated, prefixed with a length, or whatever.
This didn't leave much room for specifying a character encoding,
which is orthogonal to the sub-type.
What we actually want is to have the type specify the string type,
and then have the sub-type determine the character encoding. These
sub-types can also be used with the Numeric type to specify the
encoding of character operands.
This change updates the enum definitions and the various bits of
code that use them, but does not add any code for working with
non-ASCII character encodings.
The project file version number was incremented to 2, since the new
FormatDescriptor serialization is mildly incompatible with the old.
(Won't explode, but it'll post a complaint and ignore the stuff
it doesn't recognize.)
While I was at it, I finished removing DciReverse. It's still part
of the 2005-string-types regression test, which currently fails
because the generated source doesn't match.
2019-08-07 22:23:23 +00:00
|
|
|
|
int stringStart = low;
|
|
|
|
|
for (int i = low; i != end; i++) {
|
2019-07-09 00:02:25 +00:00
|
|
|
|
byte val = mFileData[i];
|
|
|
|
|
if ((val & 0x80) == endMask) {
|
|
|
|
|
// found the end of a string
|
Change the way string formats are defined
We used to use type="String", with the sub-type indicating whether
the string was null-terminated, prefixed with a length, or whatever.
This didn't leave much room for specifying a character encoding,
which is orthogonal to the sub-type.
What we actually want is to have the type specify the string type,
and then have the sub-type determine the character encoding. These
sub-types can also be used with the Numeric type to specify the
encoding of character operands.
This change updates the enum definitions and the various bits of
code that use them, but does not add any code for working with
non-ASCII character encodings.
The project file version number was incremented to 2, since the new
FormatDescriptor serialization is mildly incompatible with the old.
(Won't explode, but it'll post a complaint and ignore the stuff
it doesn't recognize.)
While I was at it, I finished removing DciReverse. It's still part
of the 2005-string-types regression test, which currently fails
because the generated source doesn't match.
2019-08-07 22:23:23 +00:00
|
|
|
|
int length = (i - stringStart) + 1;
|
2019-08-16 00:53:12 +00:00
|
|
|
|
FormatDescriptor dfd = FormatDescriptor.Create(length, type,
|
|
|
|
|
ResolveAsciiGeneric(stringStart, subType));
|
|
|
|
|
Results.Add(stringStart, dfd);
|
Change the way string formats are defined
We used to use type="String", with the sub-type indicating whether
the string was null-terminated, prefixed with a length, or whatever.
This didn't leave much room for specifying a character encoding,
which is orthogonal to the sub-type.
What we actually want is to have the type specify the string type,
and then have the sub-type determine the character encoding. These
sub-types can also be used with the Numeric type to specify the
encoding of character operands.
This change updates the enum definitions and the various bits of
code that use them, but does not add any code for working with
non-ASCII character encodings.
The project file version number was incremented to 2, since the new
FormatDescriptor serialization is mildly incompatible with the old.
(Won't explode, but it'll post a complaint and ignore the stuff
it doesn't recognize.)
While I was at it, I finished removing DciReverse. It's still part
of the 2005-string-types regression test, which currently fails
because the generated source doesn't match.
2019-08-07 22:23:23 +00:00
|
|
|
|
stringStart = i + 1;
|
2019-07-09 00:02:25 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Debug.Assert(stringStart == end);
|
|
|
|
|
}
|
2019-07-11 20:56:16 +00:00
|
|
|
|
|
|
|
|
|
#endregion FormatDescriptor creation
|
2019-07-09 00:02:25 +00:00
|
|
|
|
}
|
|
|
|
|
}
|