#393, Bug 1135377 - Part 8: Disallow extended pattern in RegExp with unicode flag. r=till, f=anba

This commit is contained in:
Tooru Fujisawa 2015-08-07 08:12:51 +09:00 committed by Cameron Kaiser
parent f31a9f9e84
commit c05db4075d
4 changed files with 207 additions and 13 deletions

View File

@ -405,6 +405,31 @@ NegativeLookahead(LifoAlloc* alloc, char16_t from, char16_t to)
return alloc->newInfallible<RegExpLookahead>(RangeAtom(alloc, from, to), false, 0, 0);
}
static bool
IsSyntaxCharacter(widechar c)
{
switch (c) {
case '^':
case '$':
case '\\':
case '.':
case '*':
case '+':
case '?':
case '(':
case ')':
case '[':
case ']':
case '{':
case '}':
case '|':
case '/':
return true;
default:
return false;
}
}
#ifdef DEBUG
// Currently only used in an assert.kASSERT.
static bool
@ -459,16 +484,23 @@ RegExpParser<CharT>::ParseClassCharacterEscape(widechar* code)
widechar controlLetter = Next();
widechar letter = controlLetter & ~('A' ^ 'a');
// For compatibility with JSC, inside a character class
// we also accept digits and underscore as control characters.
if ((controlLetter >= '0' && controlLetter <= '9') ||
controlLetter == '_' ||
(letter >= 'A' && letter <= 'Z')) {
// we also accept digits and underscore as control characters,
// but only in non-unicode mode
if ((!unicode_ &&
((controlLetter >= '0' && controlLetter <= '9') ||
controlLetter == '_')) ||
(letter >= 'A' && letter <= 'Z'))
{
Advance(2);
// Control letters mapped to ASCII control characters in the range
// 0x00-0x1f.
*code = controlLetter & 0x1f;
return true;
}
if (unicode_) {
ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
return false;
}
// We match JSC in reading the backslash as a literal
// character instead of as starting an escape.
*code = '\\';
@ -476,9 +508,18 @@ RegExpParser<CharT>::ParseClassCharacterEscape(widechar* code)
}
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7':
// For compatibility, we interpret a decimal escape that isn't
// a back reference (and therefore either \0 or not valid according
// to the specification) as a 1..3 digit octal character code.
if (unicode_) {
if (current() == '0') {
*code = 0;
return true;
}
ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
return false;
}
// For compatibility, outside of unicode mode, we interpret a decimal
// escape that isn't a back reference (and therefore either \0 or not
// valid according to the specification) as a 1..3 digit octal
// character code.
*code = ParseOctalLiteral();
return true;
case 'x': {
@ -488,8 +529,12 @@ RegExpParser<CharT>::ParseClassCharacterEscape(widechar* code)
*code = value;
return true;
}
if (unicode_) {
ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
return false;
}
// If \x is not followed by a two-digit hexadecimal, treat it
// as an identity escape.
// as an identity escape in non-unicode mode.
*code = 'x';
return true;
}
@ -527,10 +572,14 @@ RegExpParser<CharT>::ParseClassCharacterEscape(widechar* code)
return true;
}
default: {
// Extended identity escape. We accept any character that hasn't
// been matched by a more specific case, not just the subset required
// by the ECMAScript specification.
// Extended identity escape (non-unicode only). We accept any character
// that hasn't been matched by a more specific case, not just the subset
// required by the ECMAScript specification.
widechar result = current();
if (unicode_ && result != '-' && !IsSyntaxCharacter(result)) {
ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
return false;
}
Advance();
*code = result;
return true;
@ -1388,6 +1437,8 @@ RegExpParser<CharT>::ParseDisjunction()
capture_index);
}
builder->AddAtom(body);
if (unicode_ && (group_type == POSITIVE_LOOKAHEAD || group_type == NEGATIVE_LOOKAHEAD))
continue;
// For compatability with JSC and ES3, we allow quantifiers after
// lookaheads, and break in all cases.
break;
@ -1527,6 +1578,8 @@ RegExpParser<CharT>::ParseDisjunction()
builder->AddAtom(atom);
break;
}
if (unicode_)
return ReportError(JSMSG_BACK_REF_OUT_OF_RANGE);
widechar first_digit = Next();
if (first_digit == '8' || first_digit == '9') {
// Treat as identity escape
@ -1537,6 +1590,14 @@ RegExpParser<CharT>::ParseDisjunction()
}
// FALLTHROUGH
case '0': {
if (unicode_) {
Advance(2);
if (IsDecimalDigit(current()))
return ReportError(JSMSG_INVALID_DECIMAL_ESCAPE);
builder->AddCharacter(0);
break;
}
Advance();
size_t octal = ParseOctalLiteral();
builder->AddCharacter(octal);
@ -1571,6 +1632,8 @@ RegExpParser<CharT>::ParseDisjunction()
// Convert lower case letters to uppercase.
widechar letter = controlLetter & ~('a' ^ 'A');
if (letter < 'A' || 'Z' < letter) {
if (unicode_)
return ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
// controlLetter is not in range 'A'-'Z' or 'a'-'z'.
// This is outside the specification. We match JSC in
// reading the backslash as a literal character instead
@ -1588,6 +1651,8 @@ RegExpParser<CharT>::ParseDisjunction()
if (ParseHexEscape(2, &value)) {
builder->AddCharacter(value);
} else {
if (unicode_)
return ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
builder->AddCharacter('x');
}
break;
@ -1639,12 +1704,16 @@ RegExpParser<CharT>::ParseDisjunction()
}
default:
// Identity escape.
if (unicode_ && !IsSyntaxCharacter(Next()))
return ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
builder->AddCharacter(Next());
Advance(2);
break;
}
break;
case '{': {
if (unicode_)
return ReportError(JSMSG_RAW_BRACE_IN_REGEP);
int dummy;
if (ParseIntervalQuantifier(&dummy, &dummy))
return ReportError(JSMSG_NOTHING_TO_REPEAT);
@ -1661,6 +1730,10 @@ RegExpParser<CharT>::ParseDisjunction()
builder->AddAtom(LeadSurrogateAtom(alloc, c));
else if (unicode::IsTrailSurrogate(c))
builder->AddAtom(TrailSurrogateAtom(alloc, c));
else if (c == ']')
return ReportError(JSMSG_RAW_BRACKET_IN_REGEP);
else if (c == '}')
return ReportError(JSMSG_RAW_BRACE_IN_REGEP);
else
builder->AddCharacter(c);
Advance();

View File

@ -447,8 +447,10 @@ MSG_DEF(JSMSG_INVALID_TIME_ZONE, 1, JSEXN_RANGEERR, "invalid time zone in
MSG_DEF(JSMSG_UNDEFINED_CURRENCY, 0, JSEXN_TYPEERR, "undefined currency in NumberFormat() with currency style")
// RegExp
MSG_DEF(JSMSG_BACK_REF_OUT_OF_RANGE, 0, JSEXN_SYNTAXERR, "back reference out of range in regular expression")
MSG_DEF(JSMSG_BAD_CLASS_RANGE, 0, JSEXN_SYNTAXERR, "invalid range in character class")
MSG_DEF(JSMSG_ESCAPE_AT_END_OF_REGEXP, 0, JSEXN_SYNTAXERR, "\\ at end of pattern")
MSG_DEF(JSMSG_INVALID_DECIMAL_ESCAPE, 0, JSEXN_SYNTAXERR, "invalid decimal escape in regular expression")
MSG_DEF(JSMSG_INVALID_GROUP, 0, JSEXN_SYNTAXERR, "invalid regexp group")
MSG_DEF(JSMSG_INVALID_IDENTITY_ESCAPE, 0, JSEXN_SYNTAXERR, "invalid identity escape in regular expression")
MSG_DEF(JSMSG_INVALID_UNICODE_ESCAPE, 0, JSEXN_SYNTAXERR, "invalid unicode escape in regular expression")
@ -457,6 +459,8 @@ MSG_DEF(JSMSG_NEWREGEXP_FLAGGED, 0, JSEXN_TYPEERR, "can't supply flags whe
MSG_DEF(JSMSG_NOTHING_TO_REPEAT, 0, JSEXN_SYNTAXERR, "nothing to repeat")
MSG_DEF(JSMSG_NUMBERS_OUT_OF_ORDER, 0, JSEXN_SYNTAXERR, "numbers out of order in {} quantifier.")
MSG_DEF(JSMSG_RANGE_WITH_CLASS_ESCAPE, 0, JSEXN_SYNTAXERR, "character class escape cannot be used in class range in regular expression")
MSG_DEF(JSMSG_RAW_BRACE_IN_REGEP, 0, JSEXN_SYNTAXERR, "raw brace is not allowed in regular expression with unicode flag")
MSG_DEF(JSMSG_RAW_BRACKET_IN_REGEP, 0, JSEXN_SYNTAXERR, "raw bracket is not allowed in regular expression with unicode flag")
MSG_DEF(JSMSG_TOO_MANY_PARENS, 0, JSEXN_INTERNALERR, "too many parentheses in regular expression")
MSG_DEF(JSMSG_UNICODE_OVERFLOW, 0, JSEXN_SYNTAXERR, "unicode codepoint should not be greater than 0x10FFFF in regular expression")
MSG_DEF(JSMSG_UNMATCHED_RIGHT_PAREN, 0, JSEXN_SYNTAXERR, "unmatched ) in regular expression")

View File

@ -0,0 +1,117 @@
var BUGNUMBER = 1135377;
var summary = "Implement RegExp unicode flag -- disallow extended patterns.";
print(BUGNUMBER + ": " + summary);
// IdentityEscape
assertEqArray(/\^\$\\\.\*\+\?\(\)\[\]\{\}\|/u.exec("^$\\.*+?()[]{}|"),
["^$\\.*+?()[]{}|"]);
assertThrowsInstanceOf(() => eval(`/\\A/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\-/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\U{10}/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\U0000/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\uD83D\\U0000/u`), SyntaxError);
assertEqArray(/[\^\$\\\.\*\+\?\(\)\[\]\{\}\|]+/u.exec("^$\\.*+?()[]{}|"),
["^$\\.*+?()[]{}|"]);
assertThrowsInstanceOf(() => eval(`/[\\A]/u`), SyntaxError);
assertEqArray(/[A\-Z]+/u.exec("a-zABC"),
["-"]);
assertThrowsInstanceOf(() => eval(`/[\\U{10}]/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/[\\U0000]/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/[\\uD83D\\U0000]/u`), SyntaxError);
// PatternCharacter
assertThrowsInstanceOf(() => eval(`/{}/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/{/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/}/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/]/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/{0}/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/{1,}/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/{1,2}/u`), SyntaxError);
// QuantifiableAssertion
assertEqArray(/.B(?=A)/u.exec("cBaCBA"),
["CB"]);
assertEqArray(/.B(?!A)/u.exec("CBAcBa"),
["cB"]);
assertEqArray(/.B(?:A)/u.exec("cBaCBA"),
["CBA"]);
assertEqArray(/.B(A)/u.exec("cBaCBA"),
["CBA", "A"]);
assertThrowsInstanceOf(() => eval(`/.B(?=A)+/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/.B(?!A)+/u`), SyntaxError);
assertEqArray(/.B(?:A)+/u.exec("cBaCBA"),
["CBA"]);
assertEqArray(/.B(A)+/u.exec("cBaCBA"),
["CBA", "A"]);
// ControlLetter
assertEqArray(/\cA/u.exec("\u0001"),
["\u0001"]);
assertEqArray(/\cZ/u.exec("\u001a"),
["\u001a"]);
assertEqArray(/\ca/u.exec("\u0001"),
["\u0001"]);
assertEqArray(/\cz/u.exec("\u001a"),
["\u001a"]);
assertEqArray(/[\cA]/u.exec("\u0001"),
["\u0001"]);
assertEqArray(/[\cZ]/u.exec("\u001a"),
["\u001a"]);
assertEqArray(/[\ca]/u.exec("\u0001"),
["\u0001"]);
assertEqArray(/[\cz]/u.exec("\u001a"),
["\u001a"]);
assertThrowsInstanceOf(() => eval(`/\\c/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\c1/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\c_/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/[\\c]/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/[\\c1]/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/[\\c_]/u`), SyntaxError);
// HexEscapeSequence
assertThrowsInstanceOf(() => eval(`/\\x/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\x0/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\x1/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\x1G/u`), SyntaxError);
// LegacyOctalEscapeSequence
assertThrowsInstanceOf(() => eval(`/\\52/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\052/u`), SyntaxError);
// DecimalEscape
assertEqArray(/\0/u.exec("\0"),
["\0"]);
assertEqArray(/[\0]/u.exec("\0"),
["\0"]);
assertEqArray(/\0A/u.exec("\0A"),
["\0A"]);
assertEqArray(/\0G/u.exec("\0G"),
["\0G"]);
assertEqArray(/(A.)\1/u.exec("ABACABAB"),
["ABAB", "AB"]);
assertEqArray(/(A.)(B.)(C.)(D.)(E.)(F.)(G.)(H.)(I.)(J.)(K.)\10/u.exec("A1B2C3D4E5F6G7H8I9JaKbJa"),
["A1B2C3D4E5F6G7H8I9JaKbJa", "A1", "B2", "C3", "D4", "E5", "F6", "G7", "H8", "I9", "Ja", "Kb"]);
assertThrowsInstanceOf(() => eval(`/\\00/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\01/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\09/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\1/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\2/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\3/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\4/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\5/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\6/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\7/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\8/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\9/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\10/u`), SyntaxError);
if (typeof reportCompare === "function")
reportCompare(true, true);

View File

@ -29,11 +29,11 @@ namespace js {
*
* https://developer.mozilla.org/en-US/docs/SpiderMonkey/Internals/Bytecode
*/
static const uint32_t XDR_BYTECODE_VERSION_SUBTRAHEND = 334;
static const uint32_t XDR_BYTECODE_VERSION_SUBTRAHEND = 335;
static const uint32_t XDR_BYTECODE_VERSION =
uint32_t(0xb973c0de - XDR_BYTECODE_VERSION_SUBTRAHEND);
static_assert(JSErr_Limit == 429,
static_assert(JSErr_Limit == 433,
"GREETINGS, POTENTIAL SUBTRAHEND INCREMENTER! If you added or "
"removed MSG_DEFs from js.msg, you should increment "
"XDR_BYTECODE_VERSION_SUBTRAHEND and update this assertion's "