cc65/src/cc65/codeseg.c

1749 lines
54 KiB
C

/*****************************************************************************/
/* */
/* codeseg.c */
/* */
/* Code segment structure */
/* */
/* */
/* */
/* (C) 2001-2011, Ullrich von Bassewitz */
/* Roemerstrasse 52 */
/* D-70794 Filderstadt */
/* EMail: uz@cc65.org */
/* */
/* */
/* This software is provided 'as-is', without any expressed or implied */
/* warranty. In no event will the authors be held liable for any damages */
/* arising from the use of this software. */
/* */
/* Permission is granted to anyone to use this software for any purpose, */
/* including commercial applications, and to alter it and redistribute it */
/* freely, subject to the following restrictions: */
/* */
/* 1. The origin of this software must not be misrepresented; you must not */
/* claim that you wrote the original software. If you use this software */
/* in a product, an acknowledgment in the product documentation would be */
/* appreciated but is not required. */
/* 2. Altered source versions must be plainly marked as such, and must not */
/* be misrepresented as being the original software. */
/* 3. This notice may not be removed or altered from any source */
/* distribution. */
/* */
/*****************************************************************************/
#include <string.h>
#include <ctype.h>
/* common */
#include "chartype.h"
#include "check.h"
#include "debugflag.h"
#include "global.h"
#include "hashfunc.h"
#include "strbuf.h"
#include "strutil.h"
#include "xmalloc.h"
/* cc65 */
#include "asmlabel.h"
#include "codeent.h"
#include "codeinfo.h"
#include "codeseg.h"
#include "datatype.h"
#include "error.h"
#include "global.h"
#include "ident.h"
#include "output.h"
#include "symentry.h"
/*****************************************************************************/
/* Helper functions */
/*****************************************************************************/
static void CS_PrintFunctionHeader (const CodeSeg* S)
/* Print a comment with the function signature to the output file */
{
/* Get the associated function */
const SymEntry* Func = S->Func;
/* If this is a global code segment, do nothing */
if (Func) {
WriteOutput ("; ---------------------------------------------------------------\n"
"; ");
PrintFuncSig (OutputFile, Func->Name, Func->Type);
WriteOutput ("\n"
"; ---------------------------------------------------------------\n"
"\n");
}
}
static void CS_MoveLabelsToEntry (CodeSeg* S, CodeEntry* E)
/* Move all labels from the label pool to the given entry and remove them
** from the pool.
*/
{
/* Transfer the labels if we have any */
unsigned I;
unsigned LabelCount = CollCount (&S->Labels);
for (I = 0; I < LabelCount; ++I) {
/* Get the label */
CodeLabel* L = CollAt (&S->Labels, I);
/* Attach it to the entry */
CE_AttachLabel (E, L);
}
/* Delete the transfered labels */
CollDeleteAll (&S->Labels);
}
static void CS_MoveLabelsToPool (CodeSeg* S, CodeEntry* E)
/* Move the labels of the code entry E to the label pool of the code segment */
{
unsigned LabelCount = CE_GetLabelCount (E);
while (LabelCount--) {
CodeLabel* L = CE_GetLabel (E, LabelCount);
L->Owner = 0;
CollAppend (&S->Labels, L);
}
CollDeleteAll (&E->Labels);
}
static CodeLabel* CS_FindLabel (CodeSeg* S, const char* Name, unsigned Hash)
/* Find the label with the given name. Return the label or NULL if not found */
{
/* Get the first hash chain entry */
CodeLabel* L = S->LabelHash[Hash];
/* Search the list */
while (L) {
if (strcmp (Name, L->Name) == 0) {
/* Found */
break;
}
L = L->Next;
}
return L;
}
static CodeLabel* CS_NewCodeLabel (CodeSeg* S, const char* Name, unsigned Hash)
/* Create a new label and insert it into the label hash table */
{
/* Create a new label */
CodeLabel* L = NewCodeLabel (Name, Hash);
/* Enter the label into the hash table */
L->Next = S->LabelHash[L->Hash];
S->LabelHash[L->Hash] = L;
/* Return the new label */
return L;
}
static void CS_RemoveLabelFromHash (CodeSeg* S, CodeLabel* L)
/* Remove the given code label from the hash list */
{
/* Get the first entry in the hash chain */
CodeLabel* List = S->LabelHash[L->Hash];
CHECK (List != 0);
/* First, remove the label from the hash chain */
if (List == L) {
/* First entry in hash chain */
S->LabelHash[L->Hash] = L->Next;
} else {
/* Must search through the chain */
while (List->Next != L) {
/* If we've reached the end of the chain, something is *really* wrong */
CHECK (List->Next != 0);
/* Next entry */
List = List->Next;
}
/* The next entry is the one, we have been searching for */
List->Next = L->Next;
}
}
static CodeLabel* PickRefLab (CodeEntry* E)
/* Pick a reference label and move it to index 0 in E. */
{
unsigned I;
unsigned LabelCount = CE_GetLabelCount (E);
CHECK (LabelCount > 0);
/* Use either the first one as reference label, or a label with a Ref that has no JumpTo.
** This is a hack to partially work around #1211. Refs with no JumpTo are labels used
** in data segments. (They are not tracked.) If a data segment is the only reference,
** the label will be pruned away, but the data reference will remain, causing linking to fail.
*/
CodeLabel* L0 = CE_GetLabel (E, 0);
for (I = 1; I < LabelCount; ++I) {
unsigned J;
CodeLabel* L = CE_GetLabel (E, I);
unsigned RefCount = CL_GetRefCount (L);
for (J = 0; J < RefCount; ++J) {
CodeEntry* EJ = CL_GetRef (L, J);
if (EJ->JumpTo == NULL) {
/* Move it to the beginning since it's simpler to handle the removal this way. */
CE_ReplaceLabel (E, L, 0);
CE_ReplaceLabel (E, L0, I);
return L;
}
}
}
return L0;
}
/*****************************************************************************/
/* Functions for parsing instructions */
/*****************************************************************************/
static const char* SkipSpace (const char* S)
/* Skip white space and return an updated pointer */
{
while (IsSpace (*S)) {
++S;
}
return S;
}
static const char* ReadToken (const char* L, const char* Term,
char* Buf, unsigned BufSize)
/* Read the next token into Buf, return the updated line pointer. The
** token is terminated by one of the characters given in term.
*/
{
/* Read/copy the token */
unsigned I = 0;
unsigned ParenCount = 0;
while (*L && (ParenCount > 0 || strchr (Term, *L) == 0)) {
if (I < BufSize-1) {
Buf[I] = *L;
} else if (I == BufSize-1) {
/* Cannot store this character, this is an input error (maybe
** identifier too long or similar).
*/
Error ("ASM code error: syntax error");
}
++I;
if (*L == ')') {
--ParenCount;
} else if (*L == '(') {
++ParenCount;
}
++L;
}
/* Terminate the buffer contents */
Buf[I] = '\0';
/* Return the updated line pointer */
return L;
}
static CodeEntry* ParseInsn (CodeSeg* S, LineInfo* LI, const char* L)
/* Parse an instruction nnd generate a code entry from it. If the line contains
** errors, output an error message and return NULL.
** For simplicity, we don't accept the broad range of input a "real" assembler
** does. The instruction and the argument are expected to be separated by
** white space, for example.
*/
{
char Mnemo[IDENTSIZE+10];
const OPCDesc* OPC;
am_t AM = 0; /* Initialize to keep gcc silent */
char Arg[IDENTSIZE+10];
char Reg;
CodeEntry* E;
CodeLabel* Label;
const char* ArgBase = Arg;
int IsLabel = 0;
/* Read the first token and skip white space after it */
L = SkipSpace (ReadToken (L, " \t:", Mnemo, sizeof (Mnemo)));
/* Check if we have a label */
if (*L == ':') {
/* Skip the colon and following white space */
L = SkipSpace (L+1);
/* Add the label */
CS_AddLabel (S, Mnemo);
/* If we have reached end of line, bail out, otherwise a mnemonic
** may follow.
*/
if (*L == '\0') {
return 0;
}
L = SkipSpace (ReadToken (L, " \t", Mnemo, sizeof (Mnemo)));
}
/* Try to find the opcode description for the mnemonic */
OPC = FindOP65 (Mnemo);
/* If we didn't find the opcode, print an error and bail out */
if (OPC == 0) {
Error ("ASM code error: %s is not a valid mnemonic", Mnemo);
return 0;
}
/* Get the addressing mode */
Arg[0] = '\0';
switch (*L) {
case '\0':
/* Implicit or accu */
if (OPC->Info & OF_NOIMP) {
AM = AM65_ACC;
} else {
AM = AM65_IMP;
}
break;
case '#':
/* Immidiate */
StrCopy (Arg, sizeof (Arg), L+1);
AM = AM65_IMM;
break;
case '(':
/* Indirect */
L = ReadToken (L+1, ",)", Arg, sizeof (Arg));
/* Check for errors */
if (*L == '\0') {
Error ("ASM code error: syntax error");
return 0;
}
/* Check the different indirect modes */
if (*L == ',') {
/* Expect zp x indirect */
L = SkipSpace (L+1);
if (toupper (*L) != 'X') {
Error ("ASM code error: 'X' expected");
return 0;
}
L = SkipSpace (L+1);
if (*L != ')') {
Error ("ASM code error: ')' expected");
return 0;
}
L = SkipSpace (L+1);
if (*L != '\0') {
Error ("ASM code error: syntax error");
return 0;
}
AM = AM65_ZPX_IND;
} else if (*L == ')') {
/* zp indirect or zp indirect, y */
L = SkipSpace (L+1);
if (*L == ',') {
L = SkipSpace (L+1);
if (toupper (*L) != 'Y') {
Error ("ASM code error: 'Y' expected");
return 0;
}
L = SkipSpace (L+1);
if (*L != '\0') {
Error ("ASM code error: syntax error");
return 0;
}
AM = AM65_ZP_INDY;
} else if (*L == '\0') {
AM = AM65_ZP_IND;
} else {
Error ("ASM code error: syntax error");
return 0;
}
}
break;
case 'a':
case 'A':
/* Accumulator? */
if (L[1] == '\0') {
AM = AM65_ACC;
break;
}
/* FALLTHROUGH */
default:
/* Absolute, maybe indexed */
L = ReadToken (L, ",", Arg, sizeof (Arg));
if (*L == '\0') {
/* Absolute, zeropage or branch */
if ((OPC->Info & OF_BRA) != 0) {
/* Branch */
AM = AM65_BRA;
} else if (IsZPArg (Arg)) {
AM = AM65_ZP;
} else {
/* Check for subroutine call to local label */
if ((OPC->Info & OF_CALL) && IsLocalLabelName (Arg)) {
Error ("ASM code error: "
"Cannot use local label '%s' in subroutine call",
Arg);
}
AM = AM65_ABS;
}
} else if (*L == ',') {
/* Indexed */
L = SkipSpace (L+1);
if (*L == '\0') {
Error ("ASM code error: syntax error");
return 0;
} else {
Reg = toupper (*L);
L = SkipSpace (L+1);
if (Reg == 'X') {
if (IsZPArg (Arg)) {
AM = AM65_ZPX;
} else {
AM = AM65_ABSX;
}
} else if (Reg == 'Y') {
/* On 6502 only LDX and STX support AM65_ZPY.
** We just play it simple and safe for now.
*/
AM = AM65_ABSY;
} else {
Error ("ASM code error: syntax error");
return 0;
}
if (*L != '\0') {
Error ("ASM code error: syntax error");
return 0;
}
}
}
break;
}
/* We do now have the addressing mode in AM. Allocate a new CodeEntry
** structure and half-initialize it. We'll set the argument and the label
** later.
*/
E = NewCodeEntry (OPC->OPC, AM, Arg, 0, LI);
/* If the instruction is a branch or accessing memory data, check if for
** the argument could refer to a label. If it does but the label does not
** exist yet, generate it. This may lead to unused labels (if the label
** is actually an external one) which are removed by the CS_MergeLabels
** function later.
*/
if ((E->Info & OF_CALL) == 0 &&
(E->ArgInfo & AIF_HAS_NAME) != 0) {
ArgBase = E->ArgBase;
IsLabel = (E->ArgInfo & AIF_LOCAL) != 0;
}
if (AM == AM65_BRA || IsLabel) {
/* Generate the hash over the label, then search for the label */
unsigned Hash = HashStr (ArgBase) % CS_LABEL_HASH_SIZE;
Label = CS_FindLabel (S, ArgBase, Hash);
/* If we don't have the label, it's a forward ref - create it unless
** it's an external function.
*/
if (Label == 0 && (OPC->OPC != OP65_JMP || IsLabel)) {
/* Generate a new label */
Label = CS_NewCodeLabel (S, ArgBase, Hash);
}
if (Label != 0) {
/* Assign the jump */
CL_AddRef (Label, E);
}
}
/* Return the new code entry */
return E;
}
/*****************************************************************************/
/* Code */
/*****************************************************************************/
CodeSeg* NewCodeSeg (const char* SegName, SymEntry* Func)
/* Create a new code segment, initialize and return it */
{
unsigned I;
const Type* RetType;
/* Allocate memory */
CodeSeg* S = xmalloc (sizeof (CodeSeg));
/* Initialize the fields */
S->SegName = xstrdup (SegName);
S->Func = Func;
InitCollection (&S->Entries);
InitCollection (&S->Labels);
for (I = 0; I < sizeof(S->LabelHash) / sizeof(S->LabelHash[0]); ++I) {
S->LabelHash[I] = 0;
}
/* If we have a function given, get the return type of the function.
** Assume ANY return type besides void will use the A and X registers.
*/
if (S->Func && !IsTypeVoid ((RetType = GetFuncReturnType (Func->Type)))) {
if (SizeOf (RetType) == SizeOf (type_long)) {
S->ExitRegs = REG_EAX;
} else {
S->ExitRegs = REG_AX;
}
} else {
S->ExitRegs = REG_NONE;
}
/* Copy the global optimization settings */
S->Optimize = (unsigned char) IS_Get (&Optimize);
S->CodeSizeFactor = (unsigned) IS_Get (&CodeSizeFactor);
/* Return the new struct */
return S;
}
void CS_AddEntry (CodeSeg* S, struct CodeEntry* E)
/* Add an entry to the given code segment */
{
/* Transfer the labels if we have any */
CS_MoveLabelsToEntry (S, E);
/* Add the entry to the list of code entries in this segment */
CollAppend (&S->Entries, E);
}
void CS_AddVLine (CodeSeg* S, LineInfo* LI, const char* Format, va_list ap)
/* Add a line to the given code segment */
{
const char* L;
CodeEntry* E;
char Token[IDENTSIZE+10];
/* Format the line */
StrBuf Buf = STATIC_STRBUF_INITIALIZER;
SB_VPrintf (&Buf, Format, ap);
/* Skip whitespace */
L = SkipSpace (SB_GetConstBuf (&Buf));
/* Check which type of instruction we have */
E = 0; /* Assume no insn created */
switch (*L) {
case '\0':
/* Empty line, just ignore it */
break;
case ';':
/* Comment or hint, ignore it for now */
break;
case '.':
/* Control instruction */
ReadToken (L, " \t", Token, sizeof (Token));
Error ("ASM code error: Pseudo instruction '%s' not supported", Token);
break;
default:
E = ParseInsn (S, LI, L);
break;
}
/* If we have a code entry, transfer the labels and insert it */
if (E) {
CS_AddEntry (S, E);
}
/* Cleanup the string buffer */
SB_Done (&Buf);
}
void CS_AddLine (CodeSeg* S, LineInfo* LI, const char* Format, ...)
/* Add a line to the given code segment */
{
va_list ap;
va_start (ap, Format);
CS_AddVLine (S, LI, Format, ap);
va_end (ap);
}
void CS_InsertEntry (CodeSeg* S, struct CodeEntry* E, unsigned Index)
/* Insert the code entry at the index given. Following code entries will be
** moved to slots with higher indices.
*/
{
/* Insert the entry into the collection */
CollInsert (&S->Entries, E, Index);
}
void CS_DelEntry (CodeSeg* S, unsigned Index)
/* Delete an entry from the code segment. This includes moving any associated
** labels, removing references to labels and even removing the referenced labels
** if the reference count drops to zero.
** Note: Labels are moved forward if possible, that is, they are moved to the
** next insn (not the preceeding one).
*/
{
/* Get the code entry for the given index */
CodeEntry* E = CS_GetEntry (S, Index);
/* If the entry has a labels, we have to move this label to the next insn.
** If there is no next insn, move the label into the code segement label
** pool. The operation is further complicated by the fact that the next
** insn may already have a label. In that case change all reference to
** this label and delete the label instead of moving it.
*/
unsigned Count = CE_GetLabelCount (E);
if (Count > 0) {
/* The instruction has labels attached. Check if there is a next
** instruction.
*/
if (Index == CS_GetEntryCount (S)-1) {
/* No next instruction, move to the codeseg label pool */
CS_MoveLabelsToPool (S, E);
} else {
/* There is a next insn, get it */
CodeEntry* N = CS_GetEntry (S, Index+1);
/* Move labels to the next entry */
CS_MoveLabels (S, E, N);
}
}
/* If this insn references a label, remove the reference. And, if the
** the reference count for this label drops to zero, remove this label.
*/
if (E->JumpTo) {
/* Remove the reference */
CS_RemoveLabelRef (S, E);
}
/* Delete the pointer to the insn */
CollDelete (&S->Entries, Index);
/* Delete the instruction itself */
FreeCodeEntry (E);
}
void CS_DelEntries (CodeSeg* S, unsigned Start, unsigned Count)
/* Delete a range of code entries. This includes removing references to labels,
** labels attached to the entries and so on.
*/
{
/* Start deleting the entries from the rear, because this involves less
** memory moving.
*/
while (Count--) {
CS_DelEntry (S, Start + Count);
}
}
void CS_MoveEntries (CodeSeg* S, unsigned Start, unsigned Count, unsigned NewPos)
/* Move a range of entries from one position to another. Start is the index
** of the first entry to move, Count is the number of entries and NewPos is
** the index of the target entry. The entry with the index Start will later
** have the index NewPos. All entries with indices NewPos and above are
** moved to higher indices. If the code block is moved to the end of the
** current code, and if pending labels exist, these labels will get attached
** to the first instruction of the moved block (the first one after the
** current code end)
*/
{
/* Transparently handle an empty range */
if (Count == 0) {
return;
}
/* If NewPos is at the end of the code segment, move any labels from the
** label pool to the first instruction of the moved range.
*/
if (NewPos == CS_GetEntryCount (S)) {
CS_MoveLabelsToEntry (S, CS_GetEntry (S, Start));
}
/* Move the code block to the destination */
CollMoveMultiple (&S->Entries, Start, Count, NewPos);
}
struct CodeEntry* CS_GetPrevEntry (CodeSeg* S, unsigned Index)
/* Get the code entry preceeding the one with the index Index. If there is no
** preceeding code entry, return NULL.
*/
{
if (Index == 0) {
/* This is the first entry */
return 0;
} else {
/* Previous entry available */
return CollAtUnchecked (&S->Entries, Index-1);
}
}
struct CodeEntry* CS_GetNextEntry (CodeSeg* S, unsigned Index)
/* Get the code entry following the one with the index Index. If there is no
** following code entry, return NULL.
*/
{
if (Index >= CollCount (&S->Entries)-1) {
/* This is the last entry */
return 0;
} else {
/* Code entries left */
return CollAtUnchecked (&S->Entries, Index+1);
}
}
int CS_GetEntries (CodeSeg* S, struct CodeEntry** List,
unsigned Start, unsigned Count)
/* Get Count code entries into List starting at index start. Return true if
** we got the lines, return false if not enough lines were available.
*/
{
/* Check if enough entries are available */
if (Start + Count > CollCount (&S->Entries)) {
return 0;
}
/* Copy the entries */
while (Count--) {
*List++ = CollAtUnchecked (&S->Entries, Start++);
}
/* We have the entries */
return 1;
}
unsigned CS_GetEntryIndex (CodeSeg* S, struct CodeEntry* E)
/* Return the index of a code entry */
{
int Index = CollIndex (&S->Entries, E);
CHECK (Index >= 0);
return Index;
}
int CS_RangeHasLabel (CodeSeg* S, unsigned Start, unsigned Count)
/* Return true if any of the code entries in the given range has a label
** attached. If the code segment does not span the given range, check the
** possible span instead.
*/
{
unsigned EntryCount = CS_GetEntryCount(S);
/* Adjust count. We expect at least Start to be valid. */
CHECK (Start < EntryCount);
if (Start + Count > EntryCount) {
Count = EntryCount - Start;
}
/* Check each entry. Since we have validated the index above, we may
** use the unchecked access function in the loop which is faster.
*/
while (Count--) {
const CodeEntry* E = CollAtUnchecked (&S->Entries, Start++);
if (CE_HasLabel (E)) {
return 1;
}
}
/* No label in the complete range */
return 0;
}
CodeLabel* CS_AddLabel (CodeSeg* S, const char* Name)
/* Add a code label for the next instruction to follow */
{
/* Calculate the hash from the name */
unsigned Hash = HashStr (Name) % CS_LABEL_HASH_SIZE;
/* Try to find the code label if it does already exist */
CodeLabel* L = CS_FindLabel (S, Name, Hash);
/* Did we find it? */
if (L) {
/* We found it - be sure it does not already have an owner */
if (L->Owner) {
Error ("ASM label '%s' is already defined", Name);
return L;
}
} else {
/* Not found - create a new one */
L = CS_NewCodeLabel (S, Name, Hash);
}
/* Safety. This call is quite costly, but safety is better */
if (CollIndex (&S->Labels, L) >= 0) {
Error ("ASM label '%s' is already defined", Name);
return L;
}
/* We do now have a valid label. Remember it for later */
CollAppend (&S->Labels, L);
/* Return the label */
return L;
}
CodeLabel* CS_GenLabel (CodeSeg* S, struct CodeEntry* E)
/* If the code entry E does already have a label, return it. Otherwise
** create a new label, attach it to E and return it.
*/
{
CodeLabel* L;
if (CE_HasLabel (E)) {
/* Get the label from this entry */
L = CE_GetLabel (E, 0);
} else {
/* Get a new name */
const char* Name = LocalLabelName (GetLocalLabel ());
/* Generate the hash over the name */
unsigned Hash = HashStr (Name) % CS_LABEL_HASH_SIZE;
/* Create a new label */
L = CS_NewCodeLabel (S, Name, Hash);
/* Attach this label to the code entry */
CE_AttachLabel (E, L);
}
/* Return the label */
return L;
}
void CS_DelLabel (CodeSeg* S, CodeLabel* L)
/* Remove references from this label and delete it. */
{
unsigned Count, I;
/* First, remove the label from the hash chain */
CS_RemoveLabelFromHash (S, L);
/* Remove references from insns jumping to this label */
Count = CollCount (&L->JumpFrom);
for (I = 0; I < Count; ++I) {
/* Get the insn referencing this label */
CodeEntry* E = CollAt (&L->JumpFrom, I);
/* Remove the reference */
CE_ClearJumpTo (E);
}
CollDeleteAll (&L->JumpFrom);
/* Remove the reference to the owning instruction if it has one. The
** function may be called for a label without an owner when deleting
** unfinished parts of the code. This is unfortunate since it allows
** errors to slip through.
*/
if (L->Owner) {
CollDeleteItem (&L->Owner->Labels, L);
}
/* All references removed, delete the label itself */
FreeCodeLabel (L);
}
void CS_MergeLabels (CodeSeg* S)
/* Merge code labels. That means: For each instruction, remove all labels but
** one and adjust references accordingly.
*/
{
unsigned I;
unsigned J;
/* First, remove all labels from the label symbol table that don't have an
** owner (this means that they are actually external labels but we didn't
** know that previously since they may have also been forward references).
*/
for (I = 0; I < CS_LABEL_HASH_SIZE; ++I) {
/* Get the first label in this hash chain */
CodeLabel** L = &S->LabelHash[I];
while (*L) {
if ((*L)->Owner == 0) {
/* The label does not have an owner, remove it from the chain */
CodeLabel* X = *L;
*L = X->Next;
/* Cleanup any entries jumping to this label */
for (J = 0; J < CL_GetRefCount (X); ++J) {
/* Get the entry referencing this label */
CodeEntry* E = CL_GetRef (X, J);
/* And remove the reference. Do NOT call CE_ClearJumpTo
** here, because this will also clear the label name,
** which is not what we want.
*/
E->JumpTo = 0;
}
/* Print some debugging output */
if (Debug) {
printf ("Removing unused global label '%s'", X->Name);
}
/* And free the label */
FreeCodeLabel (X);
} else {
/* Label is owned, point to next code label pointer */
L = &((*L)->Next);
}
}
}
/* Walk over all code entries */
for (I = 0; I < CS_GetEntryCount (S); ++I) {
CodeLabel* RefLab;
unsigned J;
/* Get a pointer to the next entry */
CodeEntry* E = CS_GetEntry (S, I);
/* If this entry has zero labels, continue with the next one */
unsigned LabelCount = CE_GetLabelCount (E);
if (LabelCount == 0) {
continue;
}
/* Pick a label to keep, all references will be moved to this one, and the other labels
** will be deleted. PickRefLab moves this to index 0.
*/
RefLab = PickRefLab (E);
/* Walk through the remaining labels and change references to these
** labels to a reference to the one and only label. Delete the labels
** that are no longer used. To increase performance, walk backwards
** through the list.
*/
for (J = LabelCount-1; J >= 1; --J) {
/* Get the next label */
CodeLabel* L = CE_GetLabel (E, J);
/* Move all references from this label to the reference label */
CL_MoveRefs (L, RefLab);
/* Remove the label completely. */
CS_DelLabel (S, L);
}
/* The reference label is the only remaining label. Check if there
** are any references to this label, and delete it if this is not
** the case.
*/
if (CollCount (&RefLab->JumpFrom) == 0) {
/* Delete the label */
CS_DelLabel (S, RefLab);
}
}
}
void CS_MoveLabels (CodeSeg* S, struct CodeEntry* Old, struct CodeEntry* New)
/* Move all labels from Old to New. The routine will move the labels itself
** if New does not have any labels, and move references if there is at least
** a label for new. If references are moved, the old label is deleted
** afterwards.
*/
{
/* Get the number of labels to move */
unsigned OldLabelCount = CE_GetLabelCount (Old);
/* Does the new entry have itself a label? */
if (CE_HasLabel (New)) {
/* The new entry does already have a label - move references */
CodeLabel* NewLabel = CE_GetLabel (New, 0);
while (OldLabelCount--) {
/* Get the next label */
CodeLabel* OldLabel = CE_GetLabel (Old, OldLabelCount);
/* Move references */
CL_MoveRefs (OldLabel, NewLabel);
/* Delete the label */
CS_DelLabel (S, OldLabel);
}
} else {
/* The new entry does not have a label, just move them */
while (OldLabelCount--) {
/* Move the label to the new entry */
CE_MoveLabel (CE_GetLabel (Old, OldLabelCount), New);
}
}
}
void CS_RemoveLabelRef (CodeSeg* S, struct CodeEntry* E)
/* Remove the reference between E and the label it jumps to. The reference
** will be removed on both sides and E->JumpTo will be 0 after that. If
** the reference was the only one for the label, the label will get
** deleted.
*/
{
/* Get a pointer to the label and make sure it exists */
CodeLabel* L = E->JumpTo;
CHECK (L != 0);
/* Delete the entry from the label */
CollDeleteItem (&L->JumpFrom, E);
/* The entry jumps no longer to L */
CE_ClearJumpTo (E);
/* If there are no more references, delete the label */
if (CollCount (&L->JumpFrom) == 0) {
CS_DelLabel (S, L);
}
}
void CS_MoveLabelRef (CodeSeg* S, struct CodeEntry* E, CodeLabel* L)
/* Change the reference of E to L instead of the current one. If this
** was the only reference to the old label, the old label will get
** deleted.
*/
{
/* Get the old label */
CodeLabel* OldLabel = E->JumpTo;
/* Be sure that code entry references a label */
PRECONDITION (OldLabel != 0);
/* Delete the entry from the label */
CollDeleteItem (&OldLabel->JumpFrom, E);
/* If there are no more references, delete the label */
if (CollCount (&OldLabel->JumpFrom) == 0) {
CS_DelLabel (S, OldLabel);
}
/* Use the new label */
CL_AddRef (L, E);
}
void CS_DelCodeRange (CodeSeg* S, unsigned First, unsigned Last)
/* Delete all entries between first and last, both inclusive. The function
** can only handle basic blocks (First is the only entry, Last the only exit)
** and no open labels. It will call FAIL if any of these preconditions are
** violated.
*/
{
unsigned I;
CodeEntry* FirstEntry;
/* Do some sanity checks */
CHECK (First <= Last && Last < CS_GetEntryCount (S));
/* If Last is actually the last insn, call CS_DelCodeAfter instead, which
** is more flexible in this case.
*/
if (Last == CS_GetEntryCount (S) - 1) {
CS_DelCodeAfter (S, First);
return;
}
/* Get the first entry and check if it has any labels. If it has, move
** them to the insn following Last. If Last is the last insn of the code
** segment, make them ownerless and move them to the label pool.
*/
FirstEntry = CS_GetEntry (S, First);
if (CE_HasLabel (FirstEntry)) {
/* Get the entry following last */
CodeEntry* FollowingEntry = CS_GetNextEntry (S, Last);
if (FollowingEntry) {
/* There is an entry after Last - move the labels */
CS_MoveLabels (S, FirstEntry, FollowingEntry);
} else {
/* Move the labels to the pool and clear the owner pointer */
CS_MoveLabelsToPool (S, FirstEntry);
}
}
/* First pass: Delete all references to labels. If the reference count
** for a label drops to zero, delete it.
*/
for (I = Last; I >= First; --I) {
/* Get the next entry */
CodeEntry* E = CS_GetEntry (S, I);
/* Check if this entry has a label reference */
if (E->JumpTo) {
/* If the label is a label in the label pool, this is an error */
CodeLabel* L = E->JumpTo;
CHECK (CollIndex (&S->Labels, L) < 0);
/* Remove the reference to the label */
CS_RemoveLabelRef (S, E);
}
}
/* Second pass: Delete the instructions. If a label attached to an
** instruction still has references, it must be references from outside
** the deleted area, which is an error.
*/
for (I = Last; I >= First; --I) {
/* Get the next entry */
CodeEntry* E = CS_GetEntry (S, I);
/* Check if this entry has a label attached */
CHECK (!CE_HasLabel (E));
/* Delete the pointer to the entry */
CollDelete (&S->Entries, I);
/* Delete the entry itself */
FreeCodeEntry (E);
}
}
void CS_DelCodeAfter (CodeSeg* S, unsigned Last)
/* Delete all entries including the given one */
{
/* Get the number of entries in this segment */
unsigned Count = CS_GetEntryCount (S);
/* First pass: Delete all references to labels. If the reference count
** for a label drops to zero, delete it.
*/
unsigned C = Count;
while (Last < C--) {
/* Get the next entry */
CodeEntry* E = CS_GetEntry (S, C);
/* Check if this entry has a label reference */
if (E->JumpTo) {
/* If the label is a label in the label pool and this is the last
** reference to the label, remove the label from the pool.
*/
CodeLabel* L = E->JumpTo;
int Index = CollIndex (&S->Labels, L);
if (Index >= 0 && CollCount (&L->JumpFrom) == 1) {
/* Delete it from the pool */
CollDelete (&S->Labels, Index);
}
/* Remove the reference to the label */
CS_RemoveLabelRef (S, E);
}
}
/* Second pass: Delete the instructions. If a label attached to an
** instruction still has references, it must be references from outside
** the deleted area. Don't delete the label in this case, just make it
** ownerless and move it to the label pool.
*/
C = Count;
while (Last < C--) {
/* Get the next entry */
CodeEntry* E = CS_GetEntry (S, C);
/* Check if this entry has a label attached */
if (CE_HasLabel (E)) {
/* Move the labels to the pool and clear the owner pointer */
CS_MoveLabelsToPool (S, E);
}
/* Delete the pointer to the entry */
CollDelete (&S->Entries, C);
/* Delete the entry itself */
FreeCodeEntry (E);
}
}
void CS_ResetMarks (CodeSeg* S, unsigned First, unsigned Last)
/* Remove all user marks from the entries in the given range */
{
while (First <= Last) {
CE_ResetMark (CS_GetEntry (S, First++));
}
}
int CS_IsBasicBlock (CodeSeg* S, unsigned First, unsigned Last)
/* Check if the given code segment range is a basic block. That is, check if
** First is the only entrance and Last is the only exit. This means that no
** jump/branch inside the block may jump to an insn below First or after(!)
** Last, and that no insn may jump into this block from the outside.
*/
{
unsigned I;
/* Don't accept invalid ranges */
CHECK (First <= Last);
/* First pass: Walk over the range and remove all marks from the entries */
CS_ResetMarks (S, First, Last);
/* Second pass: Walk over the range checking all labels. Note: There may be
** label on the first insn which is ok.
*/
I = First + 1;
while (I <= Last) {
/* Get the next entry */
CodeEntry* E = CS_GetEntry (S, I);
/* Check if this entry has one or more labels, if so, check which
** entries jump to this label.
*/
unsigned LabelCount = CE_GetLabelCount (E);
unsigned LabelIndex;
for (LabelIndex = 0; LabelIndex < LabelCount; ++LabelIndex) {
/* Get this label */
CodeLabel* L = CE_GetLabel (E, LabelIndex);
/* Walk over all entries that jump to this label. Check for each
** of the entries if it is out of the range.
*/
unsigned RefCount = CL_GetRefCount (L);
unsigned RefIndex;
for (RefIndex = 0; RefIndex < RefCount; ++RefIndex) {
/* Get the code entry that jumps here */
CodeEntry* Ref = CL_GetRef (L, RefIndex);
/* Walk over out complete range and check if we find the
** refering entry. This is cheaper than using CS_GetEntryIndex,
** because CS_GetEntryIndex will search the complete code
** segment and not just our range.
*/
unsigned J;
for (J = First; J <= Last; ++J) {
if (Ref == CS_GetEntry (S, J)) {
break;
}
}
if (J > Last) {
/* We did not find the entry. This means that the jump to
** out code segment entry E came from outside the range,
** which in turn means that the given range is not a basic
** block.
*/
CS_ResetMarks (S, First, Last);
return 0;
}
/* If we come here, we found the entry. Mark it, so we know
** that the branch to the label is in range.
*/
CE_SetMark (Ref);
}
}
/* Next entry */
++I;
}
/* Third pass: Walk again over the range and check all branches. If we
** find a branch that is not marked, its target is not inside the range
** (since we checked all the labels in the range before).
*/
I = First;
while (I <= Last) {
/* Get the next entry */
CodeEntry* E = CS_GetEntry (S, I);
/* Check if this is a branch and if so, if it has a mark */
if (E->Info & (OF_UBRA | OF_CBRA)) {
if (!CE_HasMark (E)) {
/* No mark means not a basic block. Before bailing out, be sure
** to remove the marks from the remaining entries.
*/
CS_ResetMarks (S, I+1, Last);
return 0;
}
/* Remove the mark */
CE_ResetMark (E);
}
/* Next entry */
++I;
}
/* Done - this is a basic block */
return 1;
}
void CS_OutputPrologue (const CodeSeg* S)
/* If the given code segment is a code segment for a function, output the
** assembler prologue into the file. That is: Output a comment header, switch
** to the correct segment and enter the local function scope. If the code
** segment is global, do nothing.
*/
{
/* Get the function associated with the code segment */
SymEntry* Func = S->Func;
/* If the code segment is associated with a function, print a function
** header and enter a local scope. Be sure to switch to the correct
** segment before outputing the function label.
*/
if (Func) {
/* Get the function descriptor */
CS_PrintFunctionHeader (S);
WriteOutput (".segment\t\"%s\"\n\n.proc\t_%s", S->SegName, Func->Name);
if (IsQualNear (Func->Type)) {
WriteOutput (": near");
} else if (IsQualFar (Func->Type)) {
WriteOutput (": far");
}
WriteOutput ("\n\n");
}
}
void CS_OutputEpilogue (const CodeSeg* S)
/* If the given code segment is a code segment for a function, output the
** assembler epilogue into the file. That is: Close the local function scope.
*/
{
if (S->Func) {
WriteOutput (".endproc\n\n");
}
}
void CS_Output (CodeSeg* S)
/* Output the code segment data to the output file */
{
unsigned I;
const LineInfo* LI;
/* Get the number of entries in this segment */
unsigned Count = CS_GetEntryCount (S);
/* If the code segment is empty, bail out here */
if (Count == 0) {
return;
}
/* Generate register info */
CS_GenRegInfo (S);
/* Output the segment directive */
WriteOutput (".segment\t\"%s\"\n\n", S->SegName);
/* Output all entries, prepended by the line information if it has changed */
LI = 0;
for (I = 0; I < Count; ++I) {
/* Get the next entry */
const CodeEntry* E = CollConstAt (&S->Entries, I);
/* Check if the line info has changed. If so, output the source line
** if the option is enabled and output debug line info if the debug
** option is enabled.
*/
if (E->LI != LI) {
/* Line info has changed, remember the new line info */
LI = E->LI;
/* Add the source line as a comment. Beware: When line continuation
** was used, the line may contain newlines.
*/
if (AddSource) {
const char* L = LI->Line;
WriteOutput (";\n; ");
while (*L) {
const char* N = strchr (L, '\n');
if (N) {
/* We have a newline, just write the first part */
WriteOutput ("%.*s\n; ", (int) (N - L), L);
L = N+1;
} else {
/* No Newline, write as is */
WriteOutput ("%s\n", L);
break;
}
}
WriteOutput (";\n");
}
/* Add line debug info */
if (DebugInfo) {
WriteOutput ("\t.dbg\tline, \"%s\", %u\n",
GetActualFileName (LI), GetActualLineNum (LI));
}
}
/* Output the code */
CE_Output (E);
}
/* Prettyier formatting */
WriteOutput ("\n");
/* If debug info is enabled, terminate the last line number information */
if (DebugInfo) {
WriteOutput ("\t.dbg\tline\n");
}
/* Free register info */
CS_FreeRegInfo (S);
}
void CS_FreeRegInfo (CodeSeg* S)
/* Free register infos for all instructions */
{
unsigned I;
for (I = 0; I < CS_GetEntryCount (S); ++I) {
CE_FreeRegInfo (CS_GetEntry(S, I));
}
}
void CS_GenRegInfo (CodeSeg* S)
/* Generate register infos for all instructions */
{
unsigned I;
RegContents Regs; /* Initial register contents */
RegContents* CurrentRegs; /* Current register contents */
int WasJump; /* True if last insn was a jump */
int Done; /* All runs done flag */
/* Be sure to delete all register infos */
CS_FreeRegInfo (S);
/* We may need two runs to get back references right */
do {
/* Assume we're done after this run */
Done = 1;
/* On entry, the register contents are unknown */
RC_Invalidate (&Regs);
RC_InvalidatePS (&Regs);
CurrentRegs = &Regs;
/* Walk over all insns and note just the changes from one insn to the
** next one.
*/
WasJump = 0;
for (I = 0; I < CS_GetEntryCount (S); ++I) {
CodeEntry* P;
/* Get the next instruction */
CodeEntry* E = CollAtUnchecked (&S->Entries, I);
/* If the instruction has a label, we need some special handling */
unsigned LabelCount = CE_GetLabelCount (E);
if (LabelCount > 0) {
/* Loop over all entry points that jump here. If these entry
** points already have register info, check if all values are
** known and identical. If all values are identical, and the
** preceeding instruction was not an unconditional branch, check
** if the register value on exit of the preceeding instruction
** is also identical. If all these values are identical, the
** value of a register is known, otherwise it is unknown.
*/
CodeLabel* Label = CE_GetLabel (E, 0);
unsigned Entry;
if (WasJump) {
/* Preceeding insn was an unconditional branch */
CodeEntry* J = CL_GetRef(Label, 0);
if (J->RI) {
Regs = J->RI->Out2;
} else {
RC_Invalidate (&Regs);
RC_InvalidatePS (&Regs);
}
Entry = 1;
} else {
Regs = *CurrentRegs;
Entry = 0;
}
while (Entry < CL_GetRefCount (Label)) {
/* Get this entry */
CodeEntry* J = CL_GetRef (Label, Entry);
if (J->RI == 0) {
/* No register info for this entry. This means that the
** instruction that jumps here is at higher addresses and
** the jump is a backward jump. We need a second run to
** get the register info right in this case. Until then,
** assume unknown register contents.
*/
Done = 0;
RC_Invalidate (&Regs);
RC_InvalidatePS (&Regs);
break;
}
if (J->RI->Out2.RegA != Regs.RegA) {
Regs.RegA = UNKNOWN_REGVAL;
}
if (J->RI->Out2.RegX != Regs.RegX) {
Regs.RegX = UNKNOWN_REGVAL;
}
if (J->RI->Out2.RegY != Regs.RegY) {
Regs.RegY = UNKNOWN_REGVAL;
}
if (J->RI->Out2.SRegLo != Regs.SRegLo) {
Regs.SRegLo = UNKNOWN_REGVAL;
}
if (J->RI->Out2.SRegHi != Regs.SRegHi) {
Regs.SRegHi = UNKNOWN_REGVAL;
}
if (J->RI->Out2.Tmp1 != Regs.Tmp1) {
Regs.Tmp1 = UNKNOWN_REGVAL;
}
unsigned PF = J->RI->Out2.PFlags ^ Regs.PFlags;
Regs.PFlags |= ((PF >> 8) | PF | (PF << 8)) & UNKNOWN_PFVAL_ALL;
Regs.ZNRegs &= J->RI->Out2.ZNRegs;
++Entry;
}
/* Use this register info */
CurrentRegs = &Regs;
}
/* Generate register info for this instruction */
CE_GenRegInfo (E, CurrentRegs);
/* Remember for the next insn if this insn was an uncondition branch */
WasJump = (E->Info & OF_UBRA) != 0;
/* Output registers for this insn are input for the next */
CurrentRegs = &E->RI->Out;
/* If this insn is a branch on zero flag, we may have more info on
** register contents for one of both flow directions, but only if
** we've gone through a previous instruction.
*/
if (LabelCount == 0 && (E->Info & OF_ZBRA) != 0 && (P = CS_GetPrevEntry (S, I)) != 0) {
/* Get the branch condition */
bc_t BC = GetBranchCond (E->OPC);
/* Check the previous instruction */
switch (P->OPC) {
case OP65_ADC:
case OP65_AND:
case OP65_DEA:
case OP65_EOR:
case OP65_INA:
case OP65_LDA:
case OP65_ORA:
case OP65_PLA:
case OP65_SBC:
/* A is zero in one execution flow direction */
if (BC == BC_EQ) {
E->RI->Out2.RegA = 0;
} else {
E->RI->Out.RegA = 0;
}
break;
case OP65_CMP:
/* If this is an immidiate compare, the A register has
** the value of the compare later.
*/
if (CE_IsConstImm (P)) {
if (BC == BC_EQ) {
E->RI->Out2.RegA = (unsigned char)P->Num;
} else {
E->RI->Out.RegA = (unsigned char)P->Num;
}
}
break;
case OP65_CPX:
/* If this is an immidiate compare, the X register has
** the value of the compare later.
*/
if (CE_IsConstImm (P)) {
if (BC == BC_EQ) {
E->RI->Out2.RegX = (unsigned char)P->Num;
} else {
E->RI->Out.RegX = (unsigned char)P->Num;
}
}
break;
case OP65_CPY:
/* If this is an immidiate compare, the Y register has
** the value of the compare later.
*/
if (CE_IsConstImm (P)) {
if (BC == BC_EQ) {
E->RI->Out2.RegY = (unsigned char)P->Num;
} else {
E->RI->Out.RegY = (unsigned char)P->Num;
}
}
break;
case OP65_DEX:
case OP65_INX:
case OP65_LDX:
case OP65_PLX:
/* X is zero in one execution flow direction */
if (BC == BC_EQ) {
E->RI->Out2.RegX = 0;
} else {
E->RI->Out.RegX = 0;
}
break;
case OP65_DEY:
case OP65_INY:
case OP65_LDY:
case OP65_PLY:
/* X is zero in one execution flow direction */
if (BC == BC_EQ) {
E->RI->Out2.RegY = 0;
} else {
E->RI->Out.RegY = 0;
}
break;
case OP65_TAX:
case OP65_TXA:
/* If the branch is a beq, both A and X are zero at the
** branch target, otherwise they are zero at the next
** insn.
*/
if (BC == BC_EQ) {
E->RI->Out2.RegA = E->RI->Out2.RegX = 0;
} else {
E->RI->Out.RegA = E->RI->Out.RegX = 0;
}
break;
case OP65_TAY:
case OP65_TYA:
/* If the branch is a beq, both A and Y are zero at the
** branch target, otherwise they are zero at the next
** insn.
*/
if (BC == BC_EQ) {
E->RI->Out2.RegA = E->RI->Out2.RegY = 0;
} else {
E->RI->Out.RegA = E->RI->Out.RegY = 0;
}
break;
default:
break;
}
}
}
} while (!Done);
}