Add "uninitialized data" format type

This allows regions that hold variable storage to be marked as data
that is initialized by the program before it is used.  Previously
the choices were to treat it as bulk data (initialized) or junk
(totally unused), neither of which are correct.

This is functionally equivalent to "junk" as far as source code
generation is concerned (though it doesn't have to be).

For the code/data/junk counter, uninitialized data is counted as
junk, because it technically does not need to be part of the binary.
This commit is contained in:
Andy McFadden 2021-10-13 14:48:05 -07:00
parent 09eba228dd
commit cb114be0f6
25 changed files with 220 additions and 25 deletions

View File

@ -448,7 +448,9 @@ namespace PluginCommon {
StringL16,
StringDci,
Dense,
Fill
Fill,
Uninit,
Junk
}
/// <summary>
@ -473,5 +475,7 @@ namespace PluginCommon {
HighAscii,
C64Petscii,
C64Screen
// skipping alignment sub-types for now
}
}

View File

@ -136,6 +136,7 @@ namespace SourceGen.AsmGen {
//DefineBigData4
{ "Fill", "!fill" },
{ "Dense", "!hex" },
{ "Uninit", "!skip" },
//Junk
{ "Align", "!align" },
{ "StrGeneric", "!text" }, // can use !xor for high ASCII
@ -458,16 +459,19 @@ namespace SourceGen.AsmGen {
opcodeStr = operandStr = null;
OutputDenseHex(offset, length, labelStr, commentStr);
break;
case FormatDescriptor.Type.Uninit:
case FormatDescriptor.Type.Junk:
bool canAlign = (dfd.FormatType == FormatDescriptor.Type.Junk);
int fillVal = Helper.CheckRangeHoldsSingleValue(data, offset, length);
if (fillVal >= 0 && GenCommon.CheckJunkAlign(offset, dfd, Project.AddrMap)) {
if (canAlign && fillVal >= 0 &&
GenCommon.CheckJunkAlign(offset, dfd, Project.AddrMap)) {
// !align ANDVALUE, EQUALVALUE [, FILLVALUE]
opcodeStr = sDataOpNames.Align;
int alignVal = 1 << FormatDescriptor.AlignmentToPower(dfd.FormatSubType);
operandStr = (alignVal - 1).ToString() +
",0," + formatter.FormatHexValue(fillVal, 2);
} else if (fillVal >= 0) {
// treat same as Fill
} else if (fillVal >= 0 && (length > 1 || fillVal == 0x00)) {
// If multi-byte, or single byte and zero, treat same as Fill.
opcodeStr = sDataOpNames.Fill;
operandStr = length + "," + formatter.FormatHexValue(fillVal, 2);
} else {

View File

@ -136,6 +136,7 @@ namespace SourceGen.AsmGen {
//DefineBigData4
{ "Fill", ".res" },
{ "Dense", ".byte" }, // really just just comma-separated bytes
{ "Uninit", ".res" },
//Junk
{ "StrGeneric", ".byte" },
//StrReverse
@ -447,13 +448,14 @@ namespace SourceGen.AsmGen {
opcodeStr = operandStr = null;
OutputDenseHex(offset, length, labelStr, commentStr);
break;
case FormatDescriptor.Type.Uninit:
case FormatDescriptor.Type.Junk:
// The ca65 .align directive has a dependency on the alignment of the
// segment as a whole. We're not currently declaring multiple segments,
// so we can't use .align without generating complaints.
int fillVal = Helper.CheckRangeHoldsSingleValue(data, offset, length);
if (fillVal >= 0) {
// treat same as Fill
if (fillVal >= 0 && (length > 1 || fillVal == 0x00)) {
// If multi-byte, or single byte and zero, treat same as Fill.
opcodeStr = sDataOpNames.Fill;
operandStr = length + "," + formatter.FormatHexValue(fillVal, 2);
} else {

View File

@ -130,6 +130,7 @@ namespace SourceGen.AsmGen {
//DefineBigData4
{ "Fill", "ds" },
{ "Dense", "hex" },
{ "Uninit", "ds" },
//Junk
//Align
{ "StrGeneric", "asc" },
@ -312,6 +313,7 @@ namespace SourceGen.AsmGen {
opcodeStr = operandStr = null;
OutputDenseHex(offset, length, labelStr, commentStr);
break;
case FormatDescriptor.Type.Uninit:
case FormatDescriptor.Type.Junk:
int fillVal = Helper.CheckRangeHoldsSingleValue(data, offset, length);
if (fillVal >= 0) {
@ -324,6 +326,12 @@ namespace SourceGen.AsmGen {
} else {
operandStr = "\\," + formatter.FormatHexValue(fillVal, 2);
}
} else if (length == 1 && fillVal != 0x00) {
// Single-byte HEX looks better than "ds 1,$xx", and will match up
// with adjacent multi-byte junk/uninit.
multiLine = true;
opcodeStr = operandStr = null;
OutputDenseHex(offset, length, labelStr, commentStr);
} else {
if (fillVal == 0) {
operandStr = length.ToString();

View File

@ -155,6 +155,7 @@ namespace SourceGen.AsmGen {
//DefineBigData4
{ "Fill", ".fill" },
{ "Dense", ".byte" }, // not really dense, just comma-separated bytes
{ "Uninit", ".fill" },
//Junk
{ "Align", ".align" },
{ "StrGeneric", ".text" },
@ -552,16 +553,20 @@ namespace SourceGen.AsmGen {
opcodeStr = operandStr = null;
OutputDenseHex(offset, length, labelStr, commentStr);
break;
case FormatDescriptor.Type.Uninit:
// TODO: use the special syntax for uninit byte/word/dword if possible.
case FormatDescriptor.Type.Junk:
bool canAlign = (dfd.FormatType == FormatDescriptor.Type.Junk);
int fillVal = Helper.CheckRangeHoldsSingleValue(data, offset, length);
if (fillVal >= 0 && GenCommon.CheckJunkAlign(offset, dfd, Project.AddrMap)) {
if (canAlign && fillVal >= 0 &&
GenCommon.CheckJunkAlign(offset, dfd, Project.AddrMap)) {
// .align <expression>[, <fill>]
opcodeStr = sDataOpNames.Align;
int alignVal = 1 << FormatDescriptor.AlignmentToPower(dfd.FormatSubType);
operandStr = alignVal.ToString() +
"," + formatter.FormatHexValue(fillVal, 2);
} else if (fillVal >= 0) {
// treat same as Fill
} else if (fillVal >= 0 && (length > 1 || fillVal == 0x00)) {
// If multi-byte, or single byte and zero, treat same as Fill.
opcodeStr = sDataOpNames.Fill;
operandStr = length + "," + formatter.FormatHexValue(fillVal, 2);
} else {

View File

@ -1271,6 +1271,8 @@ namespace SourceGen {
return FormatDescriptor.Type.StringDci;
case DataType.Fill:
return FormatDescriptor.Type.Fill;
case DataType.Uninit:
return FormatDescriptor.Type.Uninit;
case DataType.Dense:
return FormatDescriptor.Type.Dense;
default:

View File

@ -1648,7 +1648,7 @@ namespace SourceGen {
if (mAnattribs[symOffset].Symbol != null &&
mAnattribs[symOffset].Symbol.Label == dfd.SymbolRef.Label) {
Messages.Add(new MessageList.MessageEntry(
MessageList.MessageEntry.SeverityLevel.Warning,
MessageList.MessageEntry.SeverityLevel.Error,
offset,
MessageList.MessageEntry.MessageType.NonAddrLabelRef,
dfd.SymbolRef.Label,
@ -1766,7 +1766,8 @@ namespace SourceGen {
offset += attr.Length;
if (attr.DataDescriptor != null &&
attr.DataDescriptor.FormatType == FormatDescriptor.Type.Junk) {
(attr.DataDescriptor.FormatType == FormatDescriptor.Type.Uninit ||
attr.DataDescriptor.FormatType == FormatDescriptor.Type.Junk)) {
ByteCounts.JunkByteCount += attr.Length;
} else {
ByteCounts.DataByteCount += attr.Length;

View File

@ -61,6 +61,7 @@ namespace SourceGen {
Dense, // raw data, represented as compactly as possible
Fill, // fill memory with a value
Uninit, // uninitialized data storage area
Junk // contents of memory are not interesting
}
@ -91,6 +92,8 @@ namespace SourceGen {
// Fill; no sub-types
// Uninit; no sub-types
// Junk; data may exist for alignment purposes. Sub-type indicates boundary.
// (SubType=None indicates no alignment)
Align2, // must be consecutive ascending powers of 2
@ -497,6 +500,9 @@ namespace SourceGen {
case Type.Fill:
retstr += "fill";
break;
case Type.Uninit:
retstr += "uninitialized data";
break;
case Type.Junk:
retstr += "unaligned junk";
break;

View File

@ -78,6 +78,7 @@ namespace SourceGen {
public string DefineBigData4 { get; private set; }
public string Fill { get; private set; }
public string Dense { get; private set; }
public string Uninit { get; private set; }
public string Junk { get; private set; }
public string Align { get; private set; }
public string StrGeneric { get; private set; }
@ -129,6 +130,7 @@ namespace SourceGen {
a.DefineBigData4 == b.DefineBigData4 &&
a.Fill == b.Fill &&
a.Dense == b.Dense &&
a.Uninit == b.Uninit &&
a.Junk == b.Junk &&
a.Align == b.Align &&
a.StrGeneric == b.StrGeneric &&
@ -242,6 +244,7 @@ namespace SourceGen {
{ "DefineBigData4", ".dbd4" },
{ "Fill", ".fill" },
{ "Dense", ".bulk" },
{ "Uninit", ".ds" },
{ "Junk", ".junk" },
{ "Align", ".align" },
@ -275,6 +278,7 @@ namespace SourceGen {
case FormatDescriptor.Type.NumericLE:
case FormatDescriptor.Type.NumericBE:
case FormatDescriptor.Type.Fill:
case FormatDescriptor.Type.Uninit:
case FormatDescriptor.Type.Junk:
return 1;
case FormatDescriptor.Type.Dense: {
@ -356,6 +360,10 @@ namespace SourceGen {
po.Opcode = opNames.Fill;
po.Operand = length + "," + formatter.FormatHexValue(data[offset], 2);
break;
case FormatDescriptor.Type.Uninit:
po.Opcode = opNames.Uninit;
po.Operand = length.ToString();
break;
case FormatDescriptor.Type.Junk:
if (dfd.FormatSubType != FormatDescriptor.SubType.None) {
po.Opcode = opNames.Align;

View File

@ -185,7 +185,7 @@ limitations under the License.
<system:String x:Key="str_ScanC64ScreenCode">C64 Screen Code</system:String>
<system:String x:Key="str_SetupSystemSummaryFmt">{1} CPU @ {2} MHz</system:String>
<system:String x:Key="str_ShowCol">Show</system:String>
<system:String x:Key="str_StatusByteCountFmt">{0:F1}KB ({1:F1}% code, {2:F1}% data, {3:F1}% junk)</system:String>
<system:String x:Key="str_StatusByteCountFmt">{0:F1}KB ({1:F1}% code, {2:F1}% data, {3:F1}% uninit/junk)</system:String>
<system:String x:Key="str_StatusReady">Ready</system:String>
<system:String x:Key="str_StrVfyDciMixedData">DCI string has mixed data</system:String>
<system:String x:Key="str_StrVfyDciNotTerminated">DCI string not terminated</system:String>

View File

@ -1,8 +1,8 @@
### 6502bench SourceGen dis65 v1.0 ###
{
"_ContentVersion":3,
"FileDataLength":1200,
"FileDataCrc32":1114187983,
"_ContentVersion":5,
"FileDataLength":1227,
"FileDataCrc32":516168842,
"ProjectProps":{
"CpuName":"6502",
"IncludeUndocumentedInstr":false,
@ -14,7 +14,9 @@
"DefaultTextScanMode":"LowHighAscii",
"MinCharsForString":4,
"SeekNearbyTargets":true,
"SmartPlpHandling":true},
"UseRelocData":false,
"SmartPlpHandling":true,
"SmartPlbHandling":true},
"PlatformSymbolFileIdentifiers":[],
"ExtensionScriptFileIdentifiers":["PROJ:20000-numeric-types.cs"],
@ -23,15 +25,24 @@
"AddressMap":[{
"Offset":0,
"Addr":4096},
"Addr":4096,
"Length":-1024,
"PreLabel":"",
"IsRelative":false},
{
"Offset":1032,
"Addr":5128},
"Addr":5128,
"Length":-1024,
"PreLabel":"",
"IsRelative":false},
{
"Offset":1048,
"Addr":5160}],
"Addr":5160,
"Length":-1024,
"PreLabel":"",
"IsRelative":false}],
"TypeHints":[{
"Low":0,
"High":0,
@ -230,6 +241,60 @@
"Length":1,
"Format":"NumericLE",
"SubFormat":"Binary",
"SymbolRef":null},
"1200":{
"Length":1,
"Format":"Uninit",
"SubFormat":"None",
"SymbolRef":null},
"1201":{
"Length":2,
"Format":"Uninit",
"SubFormat":"None",
"SymbolRef":null},
"1203":{
"Length":3,
"Format":"Uninit",
"SubFormat":"None",
"SymbolRef":null},
"1206":{
"Length":4,
"Format":"Uninit",
"SubFormat":"None",
"SymbolRef":null},
"1210":{
"Length":1,
"Format":"Uninit",
"SubFormat":"None",
"SymbolRef":null},
"1211":{
"Length":2,
"Format":"Uninit",
"SubFormat":"None",
"SymbolRef":null},
"1213":{
"Length":3,
"Format":"Uninit",
"SubFormat":"None",
"SymbolRef":null},
"1216":{
"Length":4,
"Format":"Uninit",
"SubFormat":"None",
"SymbolRef":null},
"1221":{
"Length":5,
"Format":"Uninit",
"SubFormat":"None",
"SymbolRef":null}},
"LvTables":{
@ -245,4 +310,10 @@
"VisualizationAnimations":[],
"VisualizationSets":{
"1160":{
"Tags":["vis000488"]}}}
"Tags":["vis000488"]}},
"RelocList":{
},
"DbrValues":{
}}

View File

@ -88,4 +88,15 @@ L14A8 .fill 8,$8b
.fill 8,$8c
.byte %10001100
.fill 7,$8c
.byte $90
.byte $92,$91
.byte $95,$94,$93
.byte $99,$98,$97,$96
.fill 1,$00
.fill 2,$00
.fill 3,$00
.fill 4,$00
.byte $80
.fill 5,$00
.byte $80
.here

View File

@ -84,4 +84,15 @@ L14A8 !fill 8,$8b
!fill 8,$8c
!byte %10001100
!fill 7,$8c
!hex 90
!hex 9291
!hex 959493
!hex 99989796
!fill 1,$00
!fill 2,$00
!fill 3,$00
!fill 4,$00
!byte $80
!fill 5,$00
!byte $80
}

View File

@ -85,3 +85,14 @@ L14A8: .res 8,$8b
.res 8,$8c
.byte %10001100
.res 7,$8c
.byte $90
.byte $92,$91
.byte $95,$94,$93
.byte $99,$98,$97,$96
.res 1,$00
.res 2,$00
.res 3,$00
.res 4,$00
.byte $80
.res 5,$00
.byte $80

View File

@ -82,3 +82,14 @@ L14A8 ds 8,$8b
ds 8,$8c
dfb %10001100
ds 7,$8c
hex 90
hex 9291
hex 959493
hex 99989796
ds 1
ds 2
ds 3
ds 4
dfb $80
ds 5
dfb $80

View File

@ -71,3 +71,16 @@ dref ds 16,$85 ;has a data reference
ds 16,$8a ;EDIT: add visualization
cref ds 16,$8b ;has a code reference
ds 16,$8c ;EDIT: format byte as binary
; Some uninitialized data for the "uninit" op.
dfb $90
dw $9192
adr $939495
adrl $96979899
ds 1
ds 2
ds 3
ds 4
dfb $80
ds 5
dfb $80

View File

@ -661,7 +661,12 @@ limitations under the License.
VerticalAlignment="Center" Margin="{StaticResource TBS}"
Text=".placeho" MaxLength="12"
FontFamily="{StaticResource GeneralMonoFont}"/>
<!-- TODO: add uninitialized data op -->
<TextBlock Grid.Column="2" Grid.Row="4" Text="Uninitialized:"
HorizontalAlignment="Right" VerticalAlignment="Center"/>
<TextBox Name="uninitTextBox" Grid.Column="3" Grid.Row="4"
VerticalAlignment="Center" Margin="{StaticResource TBS}"
Text=".placeho" MaxLength="12"
FontFamily="{StaticResource GeneralMonoFont}"/>
<TextBlock Grid.Column="4" Grid.Row="4" Text="Junk:"
HorizontalAlignment="Right" VerticalAlignment="Center"/>
<TextBox Name="junkTextBox" Grid.Column="5" Grid.Row="4"

View File

@ -1191,6 +1191,7 @@ namespace SourceGen.WpfGui {
new TextBoxPropertyMap(defineBigData2TextBox, "DefineBigData2"),
new TextBoxPropertyMap(fillTextBox, "Fill"),
new TextBoxPropertyMap(denseTextBox, "Dense"),
new TextBoxPropertyMap(uninitTextBox, "Uninit"),
new TextBoxPropertyMap(junkTextBox, "Junk"),
new TextBoxPropertyMap(alignTextBox, "Align"),
new TextBoxPropertyMap(strGenericTextBox, "StrGeneric"),

View File

@ -143,6 +143,8 @@ limitations under the License.
</StackPanel>
<RadioButton Name="radioFill" GroupName="Main" Content="Area _filled with value" Margin="0,4,0,0"
Checked="MainGroup_CheckedChanged"/>
<RadioButton Name="radioUninit" GroupName="Main" Content="Uninitialized data" Margin="0,4,0,0"
Checked="MainGroup_CheckedChanged"/>
<StackPanel Orientation="Horizontal" Margin="0,4,0,0">
<RadioButton Name="radioJunk" GroupName="Main" Content="_Junk bytes, end aligned to"
Checked="MainGroup_CheckedChanged"/>

View File

@ -507,7 +507,7 @@ namespace SourceGen.WpfGui {
// Check for run of bytes (2 or more of the same thing). Remember that
// we check this one region at a time, and each region could have different
// bytes, but so long as the bytes are all the same within a region we're good.
// bytes, but so long as the bytes are all the same within each region we're good.
if (radioFill.IsEnabled && count > 1 &&
DataAnalysis.RecognizeRun(mFileData, rng.Low, rng.High) == count) {
// LGTM
@ -897,6 +897,9 @@ namespace SourceGen.WpfGui {
case FormatDescriptor.Type.Fill:
preferredFormat = radioFill;
break;
case FormatDescriptor.Type.Uninit:
preferredFormat = radioUninit;
break;
case FormatDescriptor.Type.Junk:
preferredFormat = radioJunk;
break;
@ -1070,6 +1073,8 @@ namespace SourceGen.WpfGui {
type = FormatDescriptor.Type.Dense;
} else if (radioFill.IsChecked == true) {
type = FormatDescriptor.Type.Fill;
} else if (radioUninit.IsChecked == true) {
type = FormatDescriptor.Type.Uninit;
} else if (radioJunk.IsChecked == true) {
type = FormatDescriptor.Type.Junk;
JunkAlignmentItem comboItem = (JunkAlignmentItem)junkAlignComboBox.SelectedItem;

View File

@ -682,6 +682,8 @@ namespace SourceGen.WpfGui {
case FormatDescriptor.Type.StringDci:
case FormatDescriptor.Type.Dense:
case FormatDescriptor.Type.Fill:
case FormatDescriptor.Type.Uninit:
case FormatDescriptor.Type.Junk:
default:
// Unexpected; used to be data?
break;

View File

@ -252,9 +252,14 @@ the data file, each address will be assigned a label.</p>
<p>The "Bulk Data" items can represent large chunks of data compactly.
The "fill" option is only available if all selected bytes have the
same value.
If a region of bytes is irrelevant, perhaps used only as padding, you
can mark it as "junk". If it appears to be adding bytes to reach a
same value.</p>
<p>If a region of bytes is used for data storage, but the initial values
don't matter, you can mark it as "uninitialized data". (The code
generated will usually use an initialized bulk data directive rather
than a "leave space" directive, because SourceGen wants to guarantee
that the assembled binary matches the original.)</p>
<p>If a region of bytes is irrelevant, e.g. dead code or padding,
you can mark it as "junk". If it appears to be adding bytes to reach a
power-of-two address boundary, you can designate it as an alignment
directive. If you have multiple regions selected, only options that
work for all regions will be shown.</p>

View File

@ -899,6 +899,9 @@ code generator figure out the implementation details.</p>
the assembler that the width has changed.</li>
<li>.DBANK - specifies what value the Data Bank Register holds
(65816 only). Used when matching operands to labels.</li>
<li>.DS - identifies space set aside for variable storage. The storage
is initialized by the program before first use, so the values
in the binary don't actually matter.</li>
<li>.JUNK - indicates that the data in a range of bytes is irrelevant.
(When generating sources, this will become .FILL or .BULK
depending on the contents of the memory region and the assembler's

View File

@ -84,6 +84,10 @@ saved in the application settings file.</p>
If you hover your mouse over them, a tooltip with an explanation will
appear.</p>
<p>A status bar at the bottom displays a summary of the amount of
code, data, and uninitialized data (variable storage or junk) found
in the program. These values are updated as you work.</p>
<h3><a name="code-list">Code List</a></h3>