From 688342e194367cee9864b5772825063c9decd236 Mon Sep 17 00:00:00 2001 From: acqn Date: Thu, 16 Apr 2020 17:17:53 +0800 Subject: [PATCH] Now every code entry has its argument parsed to tell some info. It fixes the compiling performance regression as well. Built-in ZPs are recognized. --- src/cc65/codeent.c | 330 +++++++++++++++++++++++++++++++---------- src/cc65/codeent.h | 45 +++++- src/cc65/codelab.c | 8 +- src/cc65/codeoptutil.c | 45 +++--- 4 files changed, 319 insertions(+), 109 deletions(-) diff --git a/src/cc65/codeent.c b/src/cc65/codeent.c index aa3a960df..2ffe4e685 100644 --- a/src/cc65/codeent.c +++ b/src/cc65/codeent.c @@ -33,6 +33,8 @@ +#include +#include #include /* common */ @@ -43,11 +45,13 @@ #include "xsprintf.h" /* cc65 */ +#include "asmlabel.h" #include "codeent.h" #include "codeinfo.h" +#include "codelab.h" #include "error.h" #include "global.h" -#include "codelab.h" +#include "ident.h" #include "opcodes.h" #include "output.h" #include "reginfo.h" @@ -95,42 +99,11 @@ static char* GetArgCopy (const char* Arg) -static int NumArg (const char* Arg, unsigned long* Num) -/* If the given argument is numerical, convert it and return true. Otherwise -** set Num to zero and return false. -*/ +static void FreeParsedArg (char* ArgBase) +/* Free a code entry parsed argument */ { - char* End; - unsigned long Val; - - /* Determine the base */ - int Base = 10; - if (*Arg == '$') { - ++Arg; - Base = 16; - } else if (*Arg == '%') { - ++Arg; - Base = 2; - } - - /* Convert the value. strtol is not exactly what we want here, but it's - ** cheap and may be replaced by something fancier later. - */ - Val = strtoul (Arg, &End, Base); - - /* Check if the conversion was successful */ - if (*End != '\0') { - - /* Could not convert */ - *Num = 0; - return 0; - - } else { - - /* Conversion ok */ - *Num = Val; - return 1; - + if (ArgBase != 0 && ArgBase != EmptyArg) { + xfree (ArgBase); } } @@ -356,7 +329,7 @@ static void SetUseChgInfo (CodeEntry* E, const OPCDesc* D) -int ParseOpcArgStr (const char* Arg, struct StrBuf* Name, int* Offset) +int ParseOpcArgStr (const char* Arg, unsigned short* ArgInfo, struct StrBuf* Name, long* Offset) /* Break the opcode argument string into a symbol name/label part plus an offset. ** Both parts are optional, but if there are any characters in the string that ** can't be parsed, it's an failure. @@ -364,10 +337,18 @@ int ParseOpcArgStr (const char* Arg, struct StrBuf* Name, int* Offset) ** Return whether parsing succeeds or not. */ { - int NewOff = 0; - const char* OffsetPart = 0; - const char* NameEnd = 0; - int Negative = 0; + unsigned short Flags = 0; + const char* OffsetPart = 0; + const char* NameEnd = 0; + int Negative = 0; + unsigned long NumVal = 0; + long long AccOffset = 0; + char* End; /* Used for checking errors */ + + if (ArgInfo != 0) { + *ArgInfo = 0; + } + *Offset = 0; /* A numeric address is treated as an unnamed address with the numeric part as the offset */ if (IsDigit (Arg[0]) || Arg[0] == '$') { @@ -381,8 +362,9 @@ int ParseOpcArgStr (const char* Arg, struct StrBuf* Name, int* Offset) ** symbol. */ if (Arg[0] == '_') { - /* Skip the underscore */ - ++Arg; + Flags |= AIF_EXTERNAL; + } else { + Flags |= AIF_BUILTIN; } /* Rip off the offset if present. */ @@ -390,6 +372,7 @@ int ParseOpcArgStr (const char* Arg, struct StrBuf* Name, int* Offset) if (OffsetPart == 0) { OffsetPart = strchr (Arg, '-'); } + if (OffsetPart != 0) { /* Get the real arg name */ NameEnd = strchr (Arg, ' '); @@ -401,21 +384,43 @@ int ParseOpcArgStr (const char* Arg, struct StrBuf* Name, int* Offset) } else { /* No offset */ - *Offset = 0; - SB_CopyStr (Name, Arg); SB_Terminate (Name); + } - return 1; + if ((Flags & AIF_EXTERNAL) == 0) { + if (SB_GetLen (Name) > 0) { + Flags |= AIF_HAS_NAME; + + /* See if the name is a local label */ + if (IsLocalLabelName (SB_GetConstBuf (Name))) { + Flags |= AIF_LOCAL; + } + } + + } else { + if (SB_GetLen (Name) <= 0) { + /* Invalid external name */ + Flags &= ~AIF_EXTERNAL; + *Offset = 0; + if (ArgInfo != 0) { + *ArgInfo = Flags | AIF_FAILURE; + } + return 0; + } + Flags |= AIF_HAS_NAME; } } - *Offset = 0; - /* Get the offset */ while (OffsetPart != 0 && OffsetPart[0] != '\0') { + /* Skip spaces */ + while (OffsetPart[0] == ' ') { + ++OffsetPart; + } + + Negative = 0; if (OffsetPart[0] == '+') { - Negative = 0; ++OffsetPart; } else if (OffsetPart[0] == '-') { Negative = 1; @@ -427,22 +432,50 @@ int ParseOpcArgStr (const char* Arg, struct StrBuf* Name, int* Offset) ++OffsetPart; } + /* Determine the base and convert the value. strtol/strtoul is not + ** exactly what we want here, but it's cheap and may be replaced by + ** something fancier later. + */ if (OffsetPart[0] == '$') { - if (sscanf (OffsetPart + 1, "%X", &NewOff) != 1) { - return 0; - } + /* Base 16 hexedemical */ + NumVal = strtoul (OffsetPart+1, &End, 16); + } else if (OffsetPart[0] != '%') { + /* Base 10 decimal */ + NumVal = strtoul (OffsetPart, &End, 10); } else { - if (sscanf (OffsetPart, "%u", &NewOff) != 1) { - return 0; - } + /* Base 2 binary */ + NumVal = strtoul (OffsetPart+1, &End, 2); } + /* Check if the conversion was successful */ + if (*End != '\0' && *End != ' ' && *End != '+' && *End != '-') { + /* Could not convert */ + *Offset = 0; + if (ArgInfo != 0) { + *ArgInfo = Flags | AIF_FAILURE; + } + return 0; + } + + /* Check for out of range result */ + if (NumVal == ULONG_MAX && errno == ERANGE) { + /* Could not convert */ + *Offset = 0; + if (ArgInfo != 0) { + *ArgInfo = Flags | AIF_FAILURE; + } + return 0; + } + + /* This argument does have an offset */ + Flags |= AIF_HAS_OFFSET; + if (Negative) { - NewOff = -NewOff; + AccOffset -= (long long)NumVal; + } else { + AccOffset += (long long)NumVal; } - *Offset += NewOff; - /* See if there are more */ Arg = OffsetPart; OffsetPart = strchr (Arg, '+'); @@ -451,6 +484,19 @@ int ParseOpcArgStr (const char* Arg, struct StrBuf* Name, int* Offset) } } + if (AccOffset > LONG_MAX || AccOffset < LONG_MIN) { + /* Could not convert */ + *Offset = 0; + if (ArgInfo != 0) { + *ArgInfo = Flags | AIF_FAILURE; + } + return 0; + } + *Offset = (long)AccOffset; + if (ArgInfo != 0) { + *ArgInfo = Flags & ~AIF_FAILURE; + } + return 1; } @@ -471,6 +517,35 @@ const char* MakeHexArg (unsigned Num) +void PreparseArg (CodeEntry* E) +/* Parse the argument string and memorize the result for the code entry */ +{ + StrBuf B; + SB_InitFromString (&B, xmalloc (strlen (E->Arg) + 1)); + + /* Parse the argument string */ + if (ParseOpcArgStr (E->Arg, &E->ArgInfo, &B, &E->ArgOff)) { + E->ArgBase = SB_GetBuf (&B); + + if ((E->ArgInfo & (AIF_HAS_NAME | AIF_HAS_OFFSET)) == AIF_HAS_OFFSET) { + E->Flags |= CEF_NUMARG; + + /* Use the new numerical value */ + E->Num = E->ArgOff; + } + + } else { + /* Parsing fails. Issue an error/warning so that this could be spotted and fixed. */ + E->ArgBase = EmptyArg; + SB_Done (&B); + if (Debug) { + Warning ("Parsing argument \"%s\" failed!", E->Arg); + } + } +} + + + CodeEntry* NewCodeEntry (opc_t OPC, am_t AM, const char* Arg, CodeLabel* JumpTo, LineInfo* LI) /* Create a new code entry, initialize and return it */ @@ -482,15 +557,24 @@ CodeEntry* NewCodeEntry (opc_t OPC, am_t AM, const char* Arg, CodeEntry* E = xmalloc (sizeof (CodeEntry)); /* Initialize the fields */ - E->OPC = D->OPC; - E->AM = AM; - E->Size = GetInsnSize (E->OPC, E->AM); - E->Arg = GetArgCopy (Arg); - E->Flags = NumArg (E->Arg, &E->Num)? CEF_NUMARG : 0; /* Needs E->Arg */ - E->Info = D->Info; - E->JumpTo = JumpTo; - E->LI = UseLineInfo (LI); - E->RI = 0; + E->OPC = D->OPC; + E->AM = AM; + E->Size = GetInsnSize (E->OPC, E->AM); + E->Arg = GetArgCopy (Arg); + E->Flags = 0; + E->Info = D->Info; + E->ArgInfo = 0; + E->JumpTo = JumpTo; + E->LI = UseLineInfo (LI); + E->RI = 0; + + /* Parse the argument string if it's given */ + if (Arg == 0 || Arg[0] == '\0') { + E->ArgBase = EmptyArg; + } else { + PreparseArg (E); + } + SetUseChgInfo (E, D); InitCollection (&E->Labels); @@ -508,6 +592,9 @@ CodeEntry* NewCodeEntry (opc_t OPC, am_t AM, const char* Arg, void FreeCodeEntry (CodeEntry* E) /* Free the given code entry */ { + /* Free the argument base string if we have one */ + FreeParsedArg (E->ArgBase); + /* Free the string argument if we have one */ FreeArg (E->Arg); @@ -572,9 +659,8 @@ void CE_ClearJumpTo (CodeEntry* E) /* Clear the JumpTo entry */ E->JumpTo = 0; - /* Clear the argument and assign the empty one */ - FreeArg (E->Arg); - E->Arg = EmptyArg; + /* Clear the argument */ + CE_SetArg (E, 0); } @@ -593,17 +679,84 @@ void CE_MoveLabel (CodeLabel* L, CodeEntry* E) void CE_SetArg (CodeEntry* E, const char* Arg) -/* Replace the argument by the new one. */ +/* Replace the whole argument by the new one. */ { + /* Free the old parsed argument base */ + FreeParsedArg (E->ArgBase); + /* Free the old argument */ FreeArg (E->Arg); /* Assign the new one */ E->Arg = GetArgCopy (Arg); + /* Parse the new argument string */ + PreparseArg (E); + /* Update the Use and Chg in E */ - const OPCDesc* D = GetOPCDesc (E->OPC); - SetUseChgInfo (E, D); + SetUseChgInfo (E, GetOPCDesc (E->OPC)); +} + + + +void CE_SetArgBaseAndOff (CodeEntry* E, const char* ArgBase, long ArgOff) +/* Replace the new argument base and offset. Argument base is always applied. +** Argument offset is applied if and only if E has the AIF_HAS_OFFSET flag set. +*/ +{ + if (ArgBase != 0 && ArgBase[0] != '\0') { + + /* The argument base is not blank */ + char Buf[IDENTSIZE + 16]; + char* Str = Buf; + size_t Len = strlen (ArgBase) + 16; + if (Len >= sizeof (Buf)) { + Str = xmalloc (Len); + } + + if (CE_HasArgOffset (E)) { + sprintf (Str, "%s%+ld", ArgBase, ArgOff); + } else { + sprintf (Str, "%s", ArgBase); + } + CE_SetArg (E, Str); + + if (Str != Buf) { + xfree (Str); + } + + } else { + /* The argument has no base */ + if ((E->ArgInfo & AIF_HAS_OFFSET) != 0) { + /* This is a numeric argument */ + E->Flags |= CEF_NUMARG; + CE_SetNumArg (E, ArgOff); + } else { + /* Empty argument */ + CE_SetArg (E, EmptyArg); + } + } +} + + + +void CE_SetArgBase (CodeEntry* E, const char* ArgBase) +/* Replace the argument base by the new one. +** The entry must have an existing base. +*/ +{ + /* Check that the entry has a base name */ + CHECK (CE_HasArgBase (E)); + + CE_SetArgBaseAndOff (E, ArgBase, E->ArgOff); +} + + + +void CE_SetArgOffset (CodeEntry* E, long ArgOff) +/* Replace the argument offset by the new one */ +{ + CE_SetArgBaseAndOff (E, E->ArgBase, ArgOff); } @@ -616,24 +769,45 @@ void CE_SetNumArg (CodeEntry* E, long Num) char Buf[16]; /* Check that the entry has a numerical argument */ - CHECK (E->Flags & CEF_NUMARG); + CHECK (CE_HasNumArg (E)); /* Make the new argument string */ if (E->Size == 2) { Num &= 0xFF; xsprintf (Buf, sizeof (Buf), "$%02X", (unsigned) Num); - } else if (E->Size == 3) { + } else if (E->Size == 3 || E->Size == 5) { Num &= 0xFFFF; xsprintf (Buf, sizeof (Buf), "$%04X", (unsigned) Num); } else { Internal ("Invalid instruction size in CE_SetNumArg"); } - /* Replace the argument by the new one */ + /* Replace the whole argument by the new one */ CE_SetArg (E, Buf); +} - /* Use the new numerical value */ - E->Num = Num; + + +int CE_IsArgStrParsed (const CodeEntry* E) +/* Return true if the argument of E was successfully parsed last time */ +{ + return (E->ArgInfo & AIF_FAILURE) == 0; +} + + + +int CE_HasArgBase (const CodeEntry* E) +/* Return true if the argument of E has a non-blank base name */ +{ + return (E->ArgInfo & AIF_HAS_NAME) != 0 && E->ArgBase[0] != '\0'; +} + + + +int CE_HasArgOffset (const CodeEntry* E) +/* Return true if the argument of E has a non-zero offset */ +{ + return (E->ArgInfo & AIF_HAS_OFFSET) != 0 && E->ArgOff != 0; } diff --git a/src/cc65/codeent.h b/src/cc65/codeent.h index 57a7677bb..173118a7f 100644 --- a/src/cc65/codeent.h +++ b/src/cc65/codeent.h @@ -73,14 +73,32 @@ struct CodeEntry { char* Arg; /* Argument as string */ unsigned long Num; /* Numeric argument */ unsigned short Info; /* Additional code info */ + unsigned short ArgInfo; /* Additional argument info */ unsigned int Use; /* Registers used */ unsigned int Chg; /* Registers changed/destroyed */ CodeLabel* JumpTo; /* Jump label */ Collection Labels; /* Labels for this instruction */ LineInfo* LI; /* Source line info for this insn */ RegInfo* RI; /* Register info for this insn */ + char* ArgBase; /* Argument broken into a base and an offset, */ + long ArgOff; /* only done when requested. */ }; +/* */ +#define AIF_HAS_NAME 0x0001U /* Argument has a name part */ +#define AIF_HAS_OFFSET 0x0002U /* Argument has a numeric part */ +#define AIF_BUILTIN 0x0004U /* The name is built-in */ +#define AIF_EXTERNAL 0x0008U /* The name is external */ +#define AIF_LOCAL 0x0010U /* The name is a local label */ +#define AIF_ZP_NAME 0x0020U /* The name is a zp location */ +#define AIF_LOBYTE 0x0100U +#define AIF_HIBYTE 0x0200U +#define AIF_BANKBYTE 0x0400U +#define AIF_FAILURE 0x8000U /* Argument was not parsed successfully */ + +#define AIF_WORD (AIF_LOBYTE | AIF_HIBYTE) +#define AIF_FAR (AIF_LOBYTE | AIF_HIBYTE | AIF_BANKBYTE) + /*****************************************************************************/ @@ -89,7 +107,7 @@ struct CodeEntry { -int ParseOpcArgStr (const char* Arg, struct StrBuf* Name, int* Offset); +int ParseOpcArgStr (const char* Arg, unsigned short* ArgInfo, struct StrBuf* Name, long* Offset); /* Break the opcode argument string into a symbol name/label part plus an offset. ** Both parts are optional, but if there are any characters in the string that ** can't be parsed, it's an failure. @@ -105,6 +123,9 @@ const char* MakeHexArg (unsigned Num); ** safe). */ +void PreparseArg (CodeEntry* E); +/* Parse the argument string and memorize the result for the code entry */ + CodeEntry* NewCodeEntry (opc_t OPC, am_t AM, const char* Arg, CodeLabel* JumpTo, LineInfo* LI); /* Create a new code entry, initialize and return it */ @@ -205,11 +226,33 @@ INLINE int CE_HasNumArg (const CodeEntry* E) void CE_SetArg (CodeEntry* E, const char* Arg); /* Replace the argument by the new one. */ +void CE_SetArgBaseAndOff (CodeEntry* E, const char* ArgBase, long ArgOff); +/* Replace the new argument base and offset. Argument base is always applied. +** Argument offset is applied if and only if E has the AIF_HAS_OFFSET flag set. +*/ + +void CE_SetArgBase (CodeEntry* E, const char* ArgBase); +/* Replace the argument base by the new one. +** The entry must have an existing base. +*/ + +void CE_SetArgOffset (CodeEntry* E, long ArgOff); +/* Replace the argument offset by the new one */ + void CE_SetNumArg (CodeEntry* E, long Num); /* Set a new numeric argument for the given code entry that must already ** have a numeric argument. */ +int CE_IsArgStrParsed (const CodeEntry* E); +/* Return true if the argument of E was successfully parsed last time */ + +int CE_HasArgBase (const CodeEntry* E); +/* Return true if the argument of E has a non-blank base name */ + +int CE_HasArgOffset (const CodeEntry* E); +/* Return true if the argument of E has a non-zero offset */ + int CE_IsConstImm (const CodeEntry* E); /* Return true if the argument of E is a constant immediate value */ diff --git a/src/cc65/codelab.c b/src/cc65/codelab.c index ff26645dc..0909702fd 100644 --- a/src/cc65/codelab.c +++ b/src/cc65/codelab.c @@ -90,8 +90,12 @@ void CL_AddRef (CodeLabel* L, struct CodeEntry* E) /* The insn at E jumps to this label */ E->JumpTo = L; - /* Replace the code entry argument with the name of the new label */ - CE_SetArg (E, L->Name); + if (CE_HasArgBase (E)) { + /* Replace the code entry argument base with the name of the new label */ + CE_SetArgBase (E, L->Name); + } else { + CE_SetArgBaseAndOff (E, L->Name, 0); + } /* Remember that in the label */ CollAppend (&L->JumpFrom, E); diff --git a/src/cc65/codeoptutil.c b/src/cc65/codeoptutil.c index 503f33859..3df762eeb 100644 --- a/src/cc65/codeoptutil.c +++ b/src/cc65/codeoptutil.c @@ -231,8 +231,6 @@ static int Affected (LoadRegInfo* LRI, const CodeEntry* E) unsigned int Chg; unsigned int UseToCheck = 0; unsigned int ChgToCheck = 0; - StrBuf Src, YSrc, New; - int SrcOff = 0, YSrcOff = 0, NewOff = 0; const ZPInfo* ZI = 0; unsigned Res = 0; CodeEntry* AE = 0; @@ -243,10 +241,6 @@ static int Affected (LoadRegInfo* LRI, const CodeEntry* E) return 0; } - SB_Init (&Src); - SB_Init (&YSrc); - SB_Init (&New); - if (E->AM == AM65_ACC || E->AM == AM65_BRA || E->AM == AM65_IMM || E->AM == AM65_IMP) { goto L_Result; } @@ -262,14 +256,13 @@ static int Affected (LoadRegInfo* LRI, const CodeEntry* E) UseToCheck |= AE->Use & ~REG_A & REG_ALL; ChgToCheck |= AE->Chg & ~REG_A & REG_ALL; - SB_InitFromString (&Src, xstrdup (AE->Arg)); - if (!ParseOpcArgStr (AE->Arg, &Src, &SrcOff)) { + /* Check if the argument has been parsed successfully */ + if (!CE_IsArgStrParsed (AE)) { /* Bail out and play it safe*/ - Res |= LI_SRC_USE | LI_SRC_CHG; - goto L_Result; + goto L_Affected; } /* We have to manually set up the use/chg flags for builtin functions */ - ZI = GetZPInfo (SB_GetConstBuf (&Src)); + ZI = GetZPInfo (AE->ArgBase); if (ZI != 0) { UseToCheck |= ZI->ByteUse; ChgToCheck |= ZI->ByteUse; @@ -287,14 +280,14 @@ static int Affected (LoadRegInfo* LRI, const CodeEntry* E) YE = LRI->LoadYEntry; if (YE != 0) { UseToCheck |= YE->Use; - SB_InitFromString (&YSrc, xstrdup (YE->Arg)); - if (!ParseOpcArgStr (YE->Arg, &YSrc, &YSrcOff)) { + + /* Check if the argument has been parsed successfully */ + if (!CE_IsArgStrParsed (YE)) { /* Bail out and play it safe*/ - Res |= LI_SRC_USE | LI_SRC_CHG; - goto L_Result; + goto L_Affected; } /* We have to manually set up the use/chg flags for builtin functions */ - ZI = GetZPInfo (SB_GetConstBuf (&YSrc)); + ZI = GetZPInfo (YE->ArgBase); if (ZI != 0) { UseToCheck |= ZI->ByteUse; ChgToCheck |= ZI->ByteUse; @@ -322,14 +315,13 @@ static int Affected (LoadRegInfo* LRI, const CodeEntry* E) goto L_Result; } /* Otherwise play it safe */ - Res |= LI_SRC_USE | LI_SRC_CHG; - goto L_Result; + goto L_Affected; } else { if ((E->Info & (OF_READ | OF_WRITE)) != 0) { - SB_InitFromString (&New, xstrdup (E->Arg)); - if (!ParseOpcArgStr (E->Arg, &New, &NewOff)) { + /* Check if the argument has been parsed successfully */ + if (!CE_IsArgStrParsed (E)) { /* Bail out and play it safe*/ goto L_Affected; } @@ -341,16 +333,16 @@ static int Affected (LoadRegInfo* LRI, const CodeEntry* E) */ if (E->AM == AM65_ABS || E->AM == AM65_ZP || - (E->AM == AM65_ZP_INDY && SB_CompareStr (&New, "sp") == 0) + (E->AM == AM65_ZP_INDY && strcmp (E->ArgBase, "sp") == 0) ) { if ((LRI->Flags & LI_CHECK_ARG) != 0) { if (AE == 0 || (AE->AM != AM65_ABS && AE->AM != AM65_ZP && (AE->AM != AM65_ZP_INDY || - SB_CompareStr (&Src, "sp") != 0)) || - (SrcOff == NewOff && - SB_Compare (&Src, &New) == 0)) { + strcmp (AE->ArgBase, "sp") != 0)) || + (AE->ArgOff == E->ArgOff && + strcmp (AE->ArgBase, E->ArgBase) == 0)) { if ((E->Info & OF_READ) != 0) { /* Used */ @@ -367,7 +359,7 @@ static int Affected (LoadRegInfo* LRI, const CodeEntry* E) /* If we don't know what memory location could have been used by Y, ** we just assume all. */ if (YE == 0 || - (YSrcOff == NewOff && SB_Compare (&YSrc, &New) == 0)) { + (YE->ArgOff == E->ArgOff && strcmp (YE->ArgBase, E->ArgBase) == 0)) { if ((E->Info & OF_READ) != 0) { /* Used */ @@ -415,9 +407,6 @@ L_Result: (E->Chg & REG_Y) != 0) { Res |= LI_Y_CHG; } - SB_Done (&Src); - SB_Done (&YSrc); - SB_Done (&New); return Res; }