1
0
mirror of https://github.com/fadden/6502bench.git synced 2024-12-01 22:50:35 +00:00

Improve label validation for platform symbol files

We were using a very simple regex pattern for the label part, and
not performing additional validation checks later.  This allowed
a symbol that started with a number (e.g. "4ALL") to get much farther
than it should have.

This change modifies the regex pattern to match only valid label
syntax.
This commit is contained in:
Andy McFadden 2021-07-03 12:32:02 -07:00
parent 210723221d
commit 1472609d15

View File

@ -44,7 +44,7 @@ namespace SourceGen {
/// NAME {@,=,<,>} VALUE [& MASK] [WIDTH] [;COMMENT] /// NAME {@,=,<,>} VALUE [& MASK] [WIDTH] [;COMMENT]
/// ///
/// Regex output groups are: /// Regex output groups are:
/// 1. NAME /// 1. NAME (2+ alphanumeric or underscore, cannot start with number)
/// 2. type/direction char /// 2. type/direction char
/// 3. VALUE (can be any non-whitespace) /// 3. VALUE (can be any non-whitespace)
/// 4. optional: WIDTH (can be any non-whitespace) /// 4. optional: WIDTH (can be any non-whitespace)
@ -54,7 +54,7 @@ namespace SourceGen {
/// If you want to make sense of this, I highly recommend https://regex101.com/ . /// If you want to make sense of this, I highly recommend https://regex101.com/ .
/// </remarks> /// </remarks>
private const string SYMBOL_PATTERN = private const string SYMBOL_PATTERN =
@"^([A-Za-z0-9_]+)\s*([@=<>])\s*([^\s;]+)\s*([^\s;]+)?\s*(;.*)?$"; @"^([A-Za-z_][A-Za-z0-9_]+)\s*([@=<>])\s*([^\s;]+)\s*([^\s;]+)?\s*(;.*)?$";
private static Regex sNameValueRegex = new Regex(SYMBOL_PATTERN); private static Regex sNameValueRegex = new Regex(SYMBOL_PATTERN);
private const int GROUP_NAME = 1; private const int GROUP_NAME = 1;
private const int GROUP_TYPE = 2; private const int GROUP_TYPE = 2;
@ -63,7 +63,8 @@ namespace SourceGen {
private const int GROUP_COMMENT = 5; private const int GROUP_COMMENT = 5;
/// <summary> /// <summary>
/// Regex pattern for mask definition in platform symbol file. /// Regex pattern for mask definition in platform symbol file. This mostly just
/// performs tokenization. Syntax and validity checking is done later.
/// ///
/// Looks like: /// Looks like:
/// CMP_MASK CMP_VALUE ADDR_MASK [;COMMENT] /// CMP_MASK CMP_VALUE ADDR_MASK [;COMMENT]
@ -158,6 +159,8 @@ namespace SourceGen {
} else { } else {
MatchCollection matches = sNameValueRegex.Matches(line); MatchCollection matches = sNameValueRegex.Matches(line);
if (matches.Count == 1) { if (matches.Count == 1) {
// Our label regex is the same as Asm65.Label's definition; no need
// for further validation on the label.
string label = matches[0].Groups[GROUP_NAME].Value; string label = matches[0].Groups[GROUP_NAME].Value;
char typeAndDir = matches[0].Groups[GROUP_TYPE].Value[0]; char typeAndDir = matches[0].Groups[GROUP_TYPE].Value[0];
bool isConst = (typeAndDir == '='); bool isConst = (typeAndDir == '=');