1
0
mirror of https://github.com/fadden/6502bench.git synced 2024-06-12 08:29:29 +00:00

Progress on Apple IIgs OMF file handling

Wrote segment parser.
This commit is contained in:
Andy McFadden 2020-06-25 17:30:44 -07:00
parent 5026fd6569
commit b77d9ba4c8
5 changed files with 534 additions and 18 deletions

View File

@ -86,6 +86,7 @@
</Compile>
<Compile Include="Tools\ApplesoftToHtml.cs" />
<Compile Include="Tools\Omf\OmfFile.cs" />
<Compile Include="Tools\Omf\OmfSegment.cs" />
<Compile Include="Tools\Omf\WpfGui\OmfViewer.xaml.cs">
<DependentUpon>OmfViewer.xaml</DependentUpon>
</Compile>

View File

@ -15,18 +15,28 @@
*/
using System;
using System.Collections.Generic;
using System.Diagnostics;
namespace SourceGen.Tools.Omf {
/// <summary>
/// Apple IIgs OMF file.
/// </summary>
/// <remarks>
/// OMF files are a series of segments. There is no file header or identifying information.
/// In some cases the length is expected to be a multiple of 512 bytes, in others it isn't.
///
/// There's no structural limitation on mixing and matching segments, whether different
/// versions or different types. The file format provides a structure in which various
/// things may be stored, but does not provide a way to tell an observer what is contained
/// within (the ProDOS file type is supposed to do that).
///
/// References:
/// - (OMF "v0" is documented in an Orca/M manual?)
/// - "Apple IIgs Programmer's Workshop Reference". Chapter 7, page 228, describes
/// OMF v1.0 and v2.0.
/// - "Apple IIgs GS/OS Reference, for GS/OS System Software Version 5.0 and later".
/// Appendix F describes OMF v2.1, and Chapter 8 has some useful information about
/// how the loader works.
/// how the loader works (e.g. page 205).
/// - "Undocumented Secrets of the Apple IIGS System Loader" by Neil Parker,
/// http://nparker.llx.com/a2/loader.html . Among other things it documents ExpressLoad
/// segments, something Apple apparently never did.
@ -37,8 +47,8 @@ namespace SourceGen.Tools.Omf {
/// - https://github.com/fadden/ciderpress/blob/master/reformat/Disasm.cpp
/// </remarks>
public class OmfFile {
public const int MIN_FILE_SIZE = 37; // can't be smaller than v0 segment hdr
public const int MAX_FILE_SIZE = (1 << 24) - 1; // cap it at 16MB
public const int MIN_FILE_SIZE = OmfSegment.MIN_HEADER_V0;
public const int MAX_FILE_SIZE = (1 << 24) - 1; // cap at 16MB
// TODO:
// - has an overall file type (load, object, RTL)
@ -54,5 +64,84 @@ namespace SourceGen.Tools.Omf {
// generate a full relocation dictionary for load files (can't do this until we
// know the overall file type, which we can't know until all segments have been
// processed a bit)
private byte[] mFileData;
/// <summary>
/// Overall file contents, determined by analysis.
/// </summary>
public enum FileKind {
Unknown = 0,
Load, // loadable files
Object, // output of assembler/compiler, before linking
Library, // static code library
RunTimeLibrary, // dynamic shared library
Foreign // not OMF, or not IIgs OMF
}
public FileKind OmfFileKind { get; private set; }
private bool mIsDamaged;
private string mDamageMsg = string.Empty;
private List<OmfSegment> mSegmentList = new List<OmfSegment>();
public List<OmfSegment> SegmentList {
get { return mSegmentList; }
}
/// <summary>
/// Constructor.
/// </summary>
/// <param name="fileData">File to analyze.</param>
public OmfFile(byte[] fileData) {
Debug.Assert(fileData.Length >= MIN_FILE_SIZE && fileData.Length <= MAX_FILE_SIZE);
mFileData = fileData;
OmfFileKind = FileKind.Unknown;
}
public void Analyze() {
OmfSegment.ParseResult result = DoAnalyze(false);
if (result == OmfSegment.ParseResult.IsLibrary ||
result == OmfSegment.ParseResult.Failure) {
DoAnalyze(true);
}
}
private OmfSegment.ParseResult DoAnalyze(bool parseAsLibrary) {
bool first = true;
int offset = 0;
int len = mFileData.Length;
while (len > 0) {
OmfSegment.ParseResult result =
OmfSegment.ParseSegment(mFileData, offset, parseAsLibrary, out OmfSegment seg);
if (result == OmfSegment.ParseResult.Failure) {
// parsing failed; reject file or stop early
if (first) {
OmfFileKind = FileKind.Foreign;
} else {
mIsDamaged = true;
mDamageMsg = string.Format("File may be damaged; ignoring last {0} bytes",
mFileData.Length - offset);
}
return result;
} else if (result == OmfSegment.ParseResult.IsLibrary) {
// Need to start over in library mode.
Debug.WriteLine("Restarting in library mode");
return result;
}
Debug.Assert(seg.FileLength > 0);
mSegmentList.Add(seg);
offset += seg.FileLength;
len -= seg.FileLength;
Debug.Assert(len >= 0);
}
Debug.WriteLine("Num segments = " + mSegmentList.Count);
return OmfSegment.ParseResult.Success;
}
}
}

View File

@ -0,0 +1,392 @@
/*
* Copyright 2020 faddenSoft
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.Collections.Specialized;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Input;
using CommonUtil;
namespace SourceGen.Tools.Omf {
/// <summary>
/// Apple IIgs OMF file segment.
/// </summary>
/// <remarks>
/// Three versions of OMF were used for Apple IIgs binaries: v1.0, v2.0, and v2.1. (There's
/// also a "v0" used for older Orca/M 8-bit products.) The Apple IIgs Programmer's
/// Workshop Reference says:
///
/// "This section describes Version 2.0 of the Apple IIGS object module
/// format(OMF). The System Loader supports files written in either Version 2.0 or
/// Version 1.0 of the OMF. The APW Linker, however, creates load files that
/// conform to Version 1.0 of the OMF. Notes in this section describe the differences
/// between Version 1.0 and Version 2.0 of the OMF. The Compact utility program,
/// described in Chapter 3, converts load files from Version 1.0 to Version 2.0."
///
/// Most IIgs binaries are v1.0 or v2.0.
///
/// You'd hope that parsing a segment would be unambiguous, but that is not the case.
/// From the same reference:
///
/// "In Version 1.0, [the first] field is described as follows. For object files
/// and load files, BLKCNT is a 4-byte field containing the number of blocks in the file
/// that the segment requires. Each block is 512 bytes. The segment header is part of
/// the first block of the segment. Segments in an object file or load file start on block
/// boundaries. For library files (ProDOS 16 file type $B2), this field is BYTECNT,
/// indicating the number of bytes in the segment. Library-file segments are not
/// aligned to block boundaries."
///
/// This choice means it's impossible to unambiguously parse a v1 OMF file without knowing
/// its ProDOS file type, which we don't have access to. In most cases we can make a
/// reasonable guess.
///
/// Documentation bugs:
/// - GS/OS ref: table F-2 says "blockCount" where it should say "SEGNAME", and shows the
/// offset of tempOrg as $2a (should be $2c).
/// - GS/OS ref: appendix F refers to a "REVISION" field, which does not seem to exist.
/// </remarks>
public class OmfSegment {
// v0.0: Original Orca/M OMF format. 0x24 bytes followed by variable-length SEGNAME.
public const int MIN_HEADER_V0 = 0x24 + 1;
// v1.0: Initial IIgs OMF format. Adds LCBANK, SEGNUM, ENTRY, DISPNAME, DISPDATA, and
// LOADNAME. Ambiguates BLKCNT/BYTECNT.
public const int MIN_HEADER_V1 = MIN_HEADER_V0 + 8 + LOAD_NAME_LEN;
// v2.0: Updated IIgs OMF format. Removes LCBANK, redefines KIND, and embraces BYTECNT.
public const int MIN_HEADER_V2 = MIN_HEADER_V1 + 4;
// v2.1: adds tempORG and a couple of attribute flags. No "min" constant needed.
// Length of LOADNAME field.
private const int LOAD_NAME_LEN = 10;
public class NameValueNote {
public string Name { get; set; }
public object Value { get; set; }
public string Note { get; set; }
}
/// <summary>
/// Values pulled from file header. Useful for display.
/// </summary>
List<NameValueNote> RawValues = new List<NameValueNote>();
public enum SegmentVersion { v0_0, v1_0, v2_0, v2_1 }
public enum SegmentKind {
Code = 0x00,
Data = 0x01,
JumpTable = 0x02,
PathName = 0x04,
LibraryDict = 0x08,
Init = 0x10,
AbsoluteBank = 0x11, // v1.0 only; became a flag
DpStack = 0x12
}
/// <summary>
/// Segment attribute flags, included in the Kind field.
/// </summary>
[Flags]
public enum SegmentAttribute {
BankRel = 0x0100, // v2.1
Skip = 0x0200, // v2.1
Reloadable = 0x0400, // v2.0
AbsoluteBank = 0x0800, // v2.0
NoSpecial = 0x1000, // v2.0
PositionIndep = 0x2000, //
Private = 0x4000, //
Dynamic = 0x8000 //
}
//
// Header fields.
//
public int FileLength { get; private set; } // from BLKCNT or BYTECNT
public int ResSpc { get; private set; }
public int Length { get; private set; }
public int Type { get; private set; }
public int LabLen { get; private set; }
public SegmentVersion Version { get; private set; }
public int BankSize { get; private set; }
public SegmentKind Kind { get; private set; }
public SegmentAttribute Attrs { get; private set; }
public int Org { get; private set; }
public int Align { get; private set; }
public int LcBank { get; private set; } // v1.0 only
public int SegNum { get; private set; }
public int Entry { get; private set; }
public int TempOrg { get; private set; } // v2.1 only
public string LoadName { get; private set; } // unused in load segments
public string SegName { get; private set; }
// According to GS/OS ref, an OMF file is considered "foreign" unless:
// - the NUMSEX field is 0
// - the NUMLEN field is 4
// - the BANKSIZE field is <= $10000
// - the ALIGN field is <= $10000
//
// So we don't need to store NUMLEN or NUMSEX. According to the GS/OS ref,
// "The BANKSIZE and align restrictions are enforced by the linker, and violations
// of them are unlikely in a load file."
private OmfSegment() { }
public enum ParseResult {
Unknown = 0,
Success,
Failure,
IsLibrary
}
public static ParseResult ParseSegment(byte[] data, int offset, bool parseAsLibrary,
out OmfSegment segResult) {
segResult = null;
Debug.Assert(offset < data.Length);
if (data.Length - offset < MIN_HEADER_V0) {
// Definitely too small.
return ParseResult.Failure;
}
//Debug.WriteLine("PARSE offset=" + offset);
OmfSegment newSeg = new OmfSegment();
// Start with the version number. The meaning of everything else depends on this.
int minLen, expectedDispName;
switch (data[offset + 0x0f]) {
case 0:
newSeg.Version = SegmentVersion.v0_0;
minLen = MIN_HEADER_V0;
expectedDispName = 0x24;
break;
case 1:
newSeg.Version = SegmentVersion.v1_0;
minLen = MIN_HEADER_V1;
expectedDispName = 0x2c;
break;
case 2:
newSeg.Version = SegmentVersion.v2_0;
minLen = MIN_HEADER_V2;
expectedDispName = 0x2c;
break;
default:
// invalid version, this is probably not OMF
return ParseResult.Failure;
}
if (data.Length - offset < minLen) {
// Too small for this version of the header.
return ParseResult.Failure;
}
int blkByteCnt = RawData.GetWord(data, offset + 0x00, 4, false);
newSeg.ResSpc = RawData.GetWord(data, offset + 0x04, 4, false);
newSeg.Length = RawData.GetWord(data, offset + 0x08, 4, false);
newSeg.LabLen = data[offset + 0x0d];
int numLen = data[offset + 0x0e];
newSeg.BankSize = RawData.GetWord(data, offset + 0x10, 4, false);
newSeg.Org = RawData.GetWord(data, offset + 0x18, 4, false);
newSeg.Align = RawData.GetWord(data, offset + 0x1c, 4, false);
int numSex = data[offset + 0x20];
int dispName, dispData;
if (newSeg.Version == SegmentVersion.v0_0) {
dispName = 0x24;
if (newSeg.LabLen == 0) {
dispData = dispName + data[offset + dispName];
} else {
dispData = dispName + LOAD_NAME_LEN;
}
} else {
newSeg.LcBank = data[offset + 0x21];
newSeg.SegNum = RawData.GetWord(data, offset + 0x22, 2, false);
newSeg.Entry = RawData.GetWord(data, offset + 0x24, 4, false);
dispName = RawData.GetWord(data, offset + 0x28, 2, false);
dispData = RawData.GetWord(data, offset + 0x2a, 2, false);
}
// The only way to detect a v2.1 segment is by checking DISPNAME.
if (newSeg.Version == SegmentVersion.v2_0 && dispName > 0x2c) {
newSeg.Version = SegmentVersion.v2_1;
expectedDispName += 4;
if (data.Length - offset < minLen + 4) {
return ParseResult.Failure;
}
newSeg.TempOrg = RawData.GetWord(data, offset + 0x2c, 4, false);
}
// Extract Kind and its attributes.
int kindByte, kindWord;
if (newSeg.Version <= SegmentVersion.v1_0) {
kindByte = data[offset + 0x0c];
if (!Enum.IsDefined(typeof(SegmentKind), kindByte & 0x1f)) {
// Example: Moria GS has a kind of $1F for its GLOBALS segment.
Debug.WriteLine("Invalid segment kind $" + kindByte.ToString("x2"));
return ParseResult.Failure;
}
newSeg.Kind = (SegmentKind)(kindByte & 0x1f);
int kindAttrs = 0;
if ((kindByte & 0x20) != 0) {
kindAttrs |= (int)SegmentAttribute.PositionIndep;
}
if ((kindByte & 0x40) != 0) {
kindAttrs |= (int)SegmentAttribute.Private;
}
if ((kindByte & 0x80) != 0) {
kindAttrs |= (int)SegmentAttribute.Dynamic;
}
newSeg.Attrs = (SegmentAttribute)kindAttrs;
} else {
// Yank all the attribute bits out at once. Don't worry about v2.0 vs. v2.1.
kindWord = RawData.GetWord(data, offset + 0x14, 2, false);
if (!Enum.IsDefined(typeof(SegmentKind), kindWord & 0x001f)) {
Debug.WriteLine("Invalid segment kind $" + kindWord.ToString("x4"));
return ParseResult.Failure;
}
newSeg.Kind = (SegmentKind)(kindWord & 0x001f);
newSeg.Attrs = (SegmentAttribute)(kindWord & 0xff00);
}
// If we found a library dictionary segment, and we're not currently handling the
// file as a library, reject this and try again.
if (newSeg.Kind == SegmentKind.LibraryDict && !parseAsLibrary) {
return ParseResult.IsLibrary;
}
// We've got the basic pieces. Handle the block-vs-byte debacle.
int segLen;
if (newSeg.Version == SegmentVersion.v0_0) {
// Always block count.
segLen = blkByteCnt * 512;
} else if (newSeg.Version >= SegmentVersion.v2_0) {
// Always byte count.
segLen = blkByteCnt;
} else /*v1.0*/ {
// Only Library files should treat the field as bytes. We can eliminate Load
// files by checking for a nonzero SegNum field, but there's no reliable way
// to tell the difference between Object and Library while looking at a segment
// in isolation.
//
// I have found a couple of examples (e.g. BRIDGE.S16 in Davex v1.23, SYSTEM:START
// on an old Paintworks GS disk) where the file's length is shy of a multiple
// of 512, so we ought to handle that.
if (parseAsLibrary) {
segLen = blkByteCnt;
} else {
segLen = blkByteCnt * 512;
}
}
newSeg.FileLength = segLen;
// Perform validity checks. If any of these fail, we're probably reading something
// that isn't OMF (or, if this isn't the first segment, we might have gone off the
// rails at some point).
if (numLen != 4 || numSex != 0) {
Debug.WriteLine("Invalid NUMLEN (" + numLen + ") or NUMSEX (" + numSex + ")");
return ParseResult.Failure;
}
if (offset + segLen > data.Length) {
// Segment is longer than the file. (This can happen easily in a static lib.)
Debug.WriteLine("Segment exceeds EOF: offset=" + offset + " len=" + data.Length +
" segLen=" + segLen);
return ParseResult.Failure;
}
if (dispName < expectedDispName || dispName > (segLen - LOAD_NAME_LEN)) {
Debug.WriteLine("Invalid DISPNAME " + dispName + " segLen=" + segLen);
return ParseResult.Failure;
}
if (dispData < expectedDispName + LOAD_NAME_LEN || dispData > (segLen - 1)) {
Debug.WriteLine("Invalid DISPDATA " + dispData + " segLen=" + segLen);
return ParseResult.Failure;
}
if (newSeg.BankSize > 0x00010000) {
Debug.WriteLine("Invalid BANKSIZE $" + newSeg.BankSize.ToString("x"));
return ParseResult.Failure;
}
if (newSeg.Align > 0x00010000) {
Debug.WriteLine("Invalid ALIGN $" + newSeg.Align.ToString("x"));
return ParseResult.Failure;
}
if (newSeg.BankSize != 0x00010000 && newSeg.BankSize != 0) {
// This is fine, just a little weird.
Debug.WriteLine("Unusual BANKSIZE $" + newSeg.BankSize.ToString("x6"));
}
if (newSeg.Align != 0 && newSeg.Align != 0x0100 && newSeg.Align != 0x00010000) {
// Unexpected; the loader will round up.
Debug.WriteLine("Unusual ALIGN $" + newSeg.Align.ToString("x6"));
}
if (newSeg.Entry != 0 && newSeg.Entry >= newSeg.Length) {
// This is invalid, but if we got this far we might as well keep going.
Debug.WriteLine("Invalid ENTRY $" + newSeg.Entry.ToString("x6"));
}
// Extract LOADNAME. Fixed-width field, padded with spaces. Except for the
// times when it's filled with zeroes instead.
string loadName = string.Empty;
int segNameStart = dispName;
if (newSeg.Version != SegmentVersion.v0_0) {
loadName = ExtractString(data, offset + dispName, LOAD_NAME_LEN);
segNameStart += LOAD_NAME_LEN;
}
// Extract SEGNAME. May be fixed- or variable-width.
string segName;
if (newSeg.LabLen == 0) {
// string preceded by length byte
int segNameLen = data[offset + segNameStart];
if (segNameStart + 1 + segNameLen > segLen) {
Debug.WriteLine("Var-width SEGNAME ran off end of segment (len=" +
segNameLen + ")");
return ParseResult.Failure;
}
segName = Encoding.ASCII.GetString(data, offset + segNameStart + 1, segNameLen);
} else {
// fixed-width string
if (segNameStart + newSeg.LabLen > segLen) {
Debug.WriteLine("Fixed-width SEGNAME ran off end of segment (len=" +
newSeg.LabLen + ")");
return ParseResult.Failure;
}
segName = ExtractString(data, offset + segNameStart, newSeg.LabLen);
}
Debug.WriteLine("LOADNAME='" + loadName + "' SEGNAME='" + segName + "'");
newSeg.LoadName = loadName;
newSeg.SegName = segName;
segResult = newSeg;
return ParseResult.Success;
}
private static string ExtractString(byte[] data, int offset, int len) {
StringBuilder sb = new StringBuilder();
for (int i = offset; i < offset + len; i++) {
byte b = data[i];
if (b == 0) {
break;
}
sb.Append((char)b);
}
return sb.ToString();
}
}
}

View File

@ -61,22 +61,22 @@ limitations under the License.
<DataGrid.Resources>
<!-- make the no-focus color the same as the in-focus color -->
<SolidColorBrush x:Key="{x:Static SystemColors.InactiveSelectionHighlightBrushKey}"
Color="{x:Static SystemColors.HighlightColor}"/>
Color="{x:Static SystemColors.HighlightColor}"/>
<SolidColorBrush x:Key="{x:Static SystemColors.InactiveSelectionHighlightTextBrushKey}"
Color="{x:Static SystemColors.HighlightTextColor}"/>
Color="{x:Static SystemColors.HighlightTextColor}"/>
</DataGrid.Resources>
<DataGrid.Columns>
<DataGridTextColumn Header="Num" Width="50" Binding="{Binding SegNum}"/>
<DataGridTextColumn Header="Type" Width="72" Binding="{Binding Value}"/>
<DataGridTextColumn Header="LoadName" Width="100" Binding="{Binding Type}"/>
<DataGridTextColumn Header="SegName" Width="100" Binding="{Binding Width}"/>
<DataGridTextColumn Header="File Size" Width="100" Binding="{Binding Comment}"/>
<DataGridTextColumn Header="Mem Size Size" Width="100" Binding="{Binding Comment}"/>
<DataGridTextColumn Header="SEGNUM" Width="50" Binding="{Binding SegNum}"/>
<DataGridTextColumn Header="KIND" Width="80" Binding="{Binding Kind}"/>
<DataGridTextColumn Header="LOADNAME" Width="100" Binding="{Binding LoadName}"/>
<DataGridTextColumn Header="SEGNAME" Width="100" Binding="{Binding SegName}"/>
<DataGridTextColumn Header="LENGTH" Width="80" Binding="{Binding MemLength}"/>
<DataGridTextColumn Header="File Length" Width="100" Binding="{Binding FileLength}"/>
</DataGrid.Columns>
</DataGrid>
<TextBlock Grid.Row="3" Text="File notes:" Margin="0,8,0,0"/>
<TextBox Grid.Row="4" Margin="0,4,0,0" Height="50"
<TextBox Grid.Row="4" Margin="0,4,0,0" Height="60"
Text="Test&#x0d;stuff1&#x0d;stuff2&#x0d;stuff3"
IsReadOnly="True" VerticalScrollBarVisibility="Auto">
</TextBox>

View File

@ -38,11 +38,41 @@ namespace SourceGen.Tools.Omf.WpfGui {
}
public class SegmentListItem {
public int SegNum { get; private set; }
private OmfSegment mOmfSeg;
// TODO: take OMFSegment obj
public SegmentListItem(int segNum) {
SegNum = segNum;
public int SegNum {
get {
return mOmfSeg.SegNum;
}
}
public string Kind {
get {
return mOmfSeg.Kind.ToString();
}
}
public string LoadName {
get {
return mOmfSeg.LoadName;
}
}
public string SegName {
get {
return mOmfSeg.SegName;
}
}
public int MemLength {
get {
return mOmfSeg.Length;
}
}
public int FileLength {
get {
return mOmfSeg.FileLength;
}
}
public SegmentListItem(OmfSegment omfSeg) {
mOmfSeg = omfSeg;
}
}
@ -57,8 +87,12 @@ namespace SourceGen.Tools.Omf.WpfGui {
mPathName = pathName;
mFileData = data;
SegmentListItems.Add(new SegmentListItem(123));
SegmentListItems.Add(new SegmentListItem(456));
OmfFile omfFile = new OmfFile(data);
omfFile.Analyze();
foreach (OmfSegment omfSeg in omfFile.SegmentList) {
SegmentListItems.Add(new SegmentListItem(omfSeg));
}
}
private void SegmentList_MouseDoubleClick(object sender, MouseButtonEventArgs e) {