mirror of
https://github.com/classilla/tenfourfox.git
synced 2025-01-03 20:30:00 +00:00
#393, Bug 1135377 - Part 8: Disallow extended pattern in RegExp with unicode flag. r=till, f=anba
This commit is contained in:
parent
f31a9f9e84
commit
c05db4075d
@ -405,6 +405,31 @@ NegativeLookahead(LifoAlloc* alloc, char16_t from, char16_t to)
|
||||
return alloc->newInfallible<RegExpLookahead>(RangeAtom(alloc, from, to), false, 0, 0);
|
||||
}
|
||||
|
||||
static bool
|
||||
IsSyntaxCharacter(widechar c)
|
||||
{
|
||||
switch (c) {
|
||||
case '^':
|
||||
case '$':
|
||||
case '\\':
|
||||
case '.':
|
||||
case '*':
|
||||
case '+':
|
||||
case '?':
|
||||
case '(':
|
||||
case ')':
|
||||
case '[':
|
||||
case ']':
|
||||
case '{':
|
||||
case '}':
|
||||
case '|':
|
||||
case '/':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
// Currently only used in an assert.kASSERT.
|
||||
static bool
|
||||
@ -459,16 +484,23 @@ RegExpParser<CharT>::ParseClassCharacterEscape(widechar* code)
|
||||
widechar controlLetter = Next();
|
||||
widechar letter = controlLetter & ~('A' ^ 'a');
|
||||
// For compatibility with JSC, inside a character class
|
||||
// we also accept digits and underscore as control characters.
|
||||
if ((controlLetter >= '0' && controlLetter <= '9') ||
|
||||
controlLetter == '_' ||
|
||||
(letter >= 'A' && letter <= 'Z')) {
|
||||
// we also accept digits and underscore as control characters,
|
||||
// but only in non-unicode mode
|
||||
if ((!unicode_ &&
|
||||
((controlLetter >= '0' && controlLetter <= '9') ||
|
||||
controlLetter == '_')) ||
|
||||
(letter >= 'A' && letter <= 'Z'))
|
||||
{
|
||||
Advance(2);
|
||||
// Control letters mapped to ASCII control characters in the range
|
||||
// 0x00-0x1f.
|
||||
*code = controlLetter & 0x1f;
|
||||
return true;
|
||||
}
|
||||
if (unicode_) {
|
||||
ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
|
||||
return false;
|
||||
}
|
||||
// We match JSC in reading the backslash as a literal
|
||||
// character instead of as starting an escape.
|
||||
*code = '\\';
|
||||
@ -476,9 +508,18 @@ RegExpParser<CharT>::ParseClassCharacterEscape(widechar* code)
|
||||
}
|
||||
case '0': case '1': case '2': case '3': case '4': case '5':
|
||||
case '6': case '7':
|
||||
// For compatibility, we interpret a decimal escape that isn't
|
||||
// a back reference (and therefore either \0 or not valid according
|
||||
// to the specification) as a 1..3 digit octal character code.
|
||||
if (unicode_) {
|
||||
if (current() == '0') {
|
||||
*code = 0;
|
||||
return true;
|
||||
}
|
||||
ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
|
||||
return false;
|
||||
}
|
||||
// For compatibility, outside of unicode mode, we interpret a decimal
|
||||
// escape that isn't a back reference (and therefore either \0 or not
|
||||
// valid according to the specification) as a 1..3 digit octal
|
||||
// character code.
|
||||
*code = ParseOctalLiteral();
|
||||
return true;
|
||||
case 'x': {
|
||||
@ -488,8 +529,12 @@ RegExpParser<CharT>::ParseClassCharacterEscape(widechar* code)
|
||||
*code = value;
|
||||
return true;
|
||||
}
|
||||
if (unicode_) {
|
||||
ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
|
||||
return false;
|
||||
}
|
||||
// If \x is not followed by a two-digit hexadecimal, treat it
|
||||
// as an identity escape.
|
||||
// as an identity escape in non-unicode mode.
|
||||
*code = 'x';
|
||||
return true;
|
||||
}
|
||||
@ -527,10 +572,14 @@ RegExpParser<CharT>::ParseClassCharacterEscape(widechar* code)
|
||||
return true;
|
||||
}
|
||||
default: {
|
||||
// Extended identity escape. We accept any character that hasn't
|
||||
// been matched by a more specific case, not just the subset required
|
||||
// by the ECMAScript specification.
|
||||
// Extended identity escape (non-unicode only). We accept any character
|
||||
// that hasn't been matched by a more specific case, not just the subset
|
||||
// required by the ECMAScript specification.
|
||||
widechar result = current();
|
||||
if (unicode_ && result != '-' && !IsSyntaxCharacter(result)) {
|
||||
ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
|
||||
return false;
|
||||
}
|
||||
Advance();
|
||||
*code = result;
|
||||
return true;
|
||||
@ -1388,6 +1437,8 @@ RegExpParser<CharT>::ParseDisjunction()
|
||||
capture_index);
|
||||
}
|
||||
builder->AddAtom(body);
|
||||
if (unicode_ && (group_type == POSITIVE_LOOKAHEAD || group_type == NEGATIVE_LOOKAHEAD))
|
||||
continue;
|
||||
// For compatability with JSC and ES3, we allow quantifiers after
|
||||
// lookaheads, and break in all cases.
|
||||
break;
|
||||
@ -1527,6 +1578,8 @@ RegExpParser<CharT>::ParseDisjunction()
|
||||
builder->AddAtom(atom);
|
||||
break;
|
||||
}
|
||||
if (unicode_)
|
||||
return ReportError(JSMSG_BACK_REF_OUT_OF_RANGE);
|
||||
widechar first_digit = Next();
|
||||
if (first_digit == '8' || first_digit == '9') {
|
||||
// Treat as identity escape
|
||||
@ -1537,6 +1590,14 @@ RegExpParser<CharT>::ParseDisjunction()
|
||||
}
|
||||
// FALLTHROUGH
|
||||
case '0': {
|
||||
if (unicode_) {
|
||||
Advance(2);
|
||||
if (IsDecimalDigit(current()))
|
||||
return ReportError(JSMSG_INVALID_DECIMAL_ESCAPE);
|
||||
builder->AddCharacter(0);
|
||||
break;
|
||||
}
|
||||
|
||||
Advance();
|
||||
size_t octal = ParseOctalLiteral();
|
||||
builder->AddCharacter(octal);
|
||||
@ -1571,6 +1632,8 @@ RegExpParser<CharT>::ParseDisjunction()
|
||||
// Convert lower case letters to uppercase.
|
||||
widechar letter = controlLetter & ~('a' ^ 'A');
|
||||
if (letter < 'A' || 'Z' < letter) {
|
||||
if (unicode_)
|
||||
return ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
|
||||
// controlLetter is not in range 'A'-'Z' or 'a'-'z'.
|
||||
// This is outside the specification. We match JSC in
|
||||
// reading the backslash as a literal character instead
|
||||
@ -1588,6 +1651,8 @@ RegExpParser<CharT>::ParseDisjunction()
|
||||
if (ParseHexEscape(2, &value)) {
|
||||
builder->AddCharacter(value);
|
||||
} else {
|
||||
if (unicode_)
|
||||
return ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
|
||||
builder->AddCharacter('x');
|
||||
}
|
||||
break;
|
||||
@ -1639,12 +1704,16 @@ RegExpParser<CharT>::ParseDisjunction()
|
||||
}
|
||||
default:
|
||||
// Identity escape.
|
||||
if (unicode_ && !IsSyntaxCharacter(Next()))
|
||||
return ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
|
||||
builder->AddCharacter(Next());
|
||||
Advance(2);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '{': {
|
||||
if (unicode_)
|
||||
return ReportError(JSMSG_RAW_BRACE_IN_REGEP);
|
||||
int dummy;
|
||||
if (ParseIntervalQuantifier(&dummy, &dummy))
|
||||
return ReportError(JSMSG_NOTHING_TO_REPEAT);
|
||||
@ -1661,6 +1730,10 @@ RegExpParser<CharT>::ParseDisjunction()
|
||||
builder->AddAtom(LeadSurrogateAtom(alloc, c));
|
||||
else if (unicode::IsTrailSurrogate(c))
|
||||
builder->AddAtom(TrailSurrogateAtom(alloc, c));
|
||||
else if (c == ']')
|
||||
return ReportError(JSMSG_RAW_BRACKET_IN_REGEP);
|
||||
else if (c == '}')
|
||||
return ReportError(JSMSG_RAW_BRACE_IN_REGEP);
|
||||
else
|
||||
builder->AddCharacter(c);
|
||||
Advance();
|
||||
|
@ -447,8 +447,10 @@ MSG_DEF(JSMSG_INVALID_TIME_ZONE, 1, JSEXN_RANGEERR, "invalid time zone in
|
||||
MSG_DEF(JSMSG_UNDEFINED_CURRENCY, 0, JSEXN_TYPEERR, "undefined currency in NumberFormat() with currency style")
|
||||
|
||||
// RegExp
|
||||
MSG_DEF(JSMSG_BACK_REF_OUT_OF_RANGE, 0, JSEXN_SYNTAXERR, "back reference out of range in regular expression")
|
||||
MSG_DEF(JSMSG_BAD_CLASS_RANGE, 0, JSEXN_SYNTAXERR, "invalid range in character class")
|
||||
MSG_DEF(JSMSG_ESCAPE_AT_END_OF_REGEXP, 0, JSEXN_SYNTAXERR, "\\ at end of pattern")
|
||||
MSG_DEF(JSMSG_INVALID_DECIMAL_ESCAPE, 0, JSEXN_SYNTAXERR, "invalid decimal escape in regular expression")
|
||||
MSG_DEF(JSMSG_INVALID_GROUP, 0, JSEXN_SYNTAXERR, "invalid regexp group")
|
||||
MSG_DEF(JSMSG_INVALID_IDENTITY_ESCAPE, 0, JSEXN_SYNTAXERR, "invalid identity escape in regular expression")
|
||||
MSG_DEF(JSMSG_INVALID_UNICODE_ESCAPE, 0, JSEXN_SYNTAXERR, "invalid unicode escape in regular expression")
|
||||
@ -457,6 +459,8 @@ MSG_DEF(JSMSG_NEWREGEXP_FLAGGED, 0, JSEXN_TYPEERR, "can't supply flags whe
|
||||
MSG_DEF(JSMSG_NOTHING_TO_REPEAT, 0, JSEXN_SYNTAXERR, "nothing to repeat")
|
||||
MSG_DEF(JSMSG_NUMBERS_OUT_OF_ORDER, 0, JSEXN_SYNTAXERR, "numbers out of order in {} quantifier.")
|
||||
MSG_DEF(JSMSG_RANGE_WITH_CLASS_ESCAPE, 0, JSEXN_SYNTAXERR, "character class escape cannot be used in class range in regular expression")
|
||||
MSG_DEF(JSMSG_RAW_BRACE_IN_REGEP, 0, JSEXN_SYNTAXERR, "raw brace is not allowed in regular expression with unicode flag")
|
||||
MSG_DEF(JSMSG_RAW_BRACKET_IN_REGEP, 0, JSEXN_SYNTAXERR, "raw bracket is not allowed in regular expression with unicode flag")
|
||||
MSG_DEF(JSMSG_TOO_MANY_PARENS, 0, JSEXN_INTERNALERR, "too many parentheses in regular expression")
|
||||
MSG_DEF(JSMSG_UNICODE_OVERFLOW, 0, JSEXN_SYNTAXERR, "unicode codepoint should not be greater than 0x10FFFF in regular expression")
|
||||
MSG_DEF(JSMSG_UNMATCHED_RIGHT_PAREN, 0, JSEXN_SYNTAXERR, "unmatched ) in regular expression")
|
||||
|
117
js/src/tests/ecma_6/RegExp/unicode-disallow-extended.js
Normal file
117
js/src/tests/ecma_6/RegExp/unicode-disallow-extended.js
Normal file
@ -0,0 +1,117 @@
|
||||
var BUGNUMBER = 1135377;
|
||||
var summary = "Implement RegExp unicode flag -- disallow extended patterns.";
|
||||
|
||||
print(BUGNUMBER + ": " + summary);
|
||||
|
||||
// IdentityEscape
|
||||
|
||||
assertEqArray(/\^\$\\\.\*\+\?\(\)\[\]\{\}\|/u.exec("^$\\.*+?()[]{}|"),
|
||||
["^$\\.*+?()[]{}|"]);
|
||||
assertThrowsInstanceOf(() => eval(`/\\A/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\-/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\U{10}/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\U0000/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\uD83D\\U0000/u`), SyntaxError);
|
||||
|
||||
assertEqArray(/[\^\$\\\.\*\+\?\(\)\[\]\{\}\|]+/u.exec("^$\\.*+?()[]{}|"),
|
||||
["^$\\.*+?()[]{}|"]);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\A]/u`), SyntaxError);
|
||||
assertEqArray(/[A\-Z]+/u.exec("a-zABC"),
|
||||
["-"]);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\U{10}]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\U0000]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\uD83D\\U0000]/u`), SyntaxError);
|
||||
|
||||
// PatternCharacter
|
||||
assertThrowsInstanceOf(() => eval(`/{}/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/{/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/}/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/{0}/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/{1,}/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/{1,2}/u`), SyntaxError);
|
||||
|
||||
// QuantifiableAssertion
|
||||
assertEqArray(/.B(?=A)/u.exec("cBaCBA"),
|
||||
["CB"]);
|
||||
assertEqArray(/.B(?!A)/u.exec("CBAcBa"),
|
||||
["cB"]);
|
||||
assertEqArray(/.B(?:A)/u.exec("cBaCBA"),
|
||||
["CBA"]);
|
||||
assertEqArray(/.B(A)/u.exec("cBaCBA"),
|
||||
["CBA", "A"]);
|
||||
|
||||
assertThrowsInstanceOf(() => eval(`/.B(?=A)+/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/.B(?!A)+/u`), SyntaxError);
|
||||
assertEqArray(/.B(?:A)+/u.exec("cBaCBA"),
|
||||
["CBA"]);
|
||||
assertEqArray(/.B(A)+/u.exec("cBaCBA"),
|
||||
["CBA", "A"]);
|
||||
|
||||
// ControlLetter
|
||||
assertEqArray(/\cA/u.exec("\u0001"),
|
||||
["\u0001"]);
|
||||
assertEqArray(/\cZ/u.exec("\u001a"),
|
||||
["\u001a"]);
|
||||
assertEqArray(/\ca/u.exec("\u0001"),
|
||||
["\u0001"]);
|
||||
assertEqArray(/\cz/u.exec("\u001a"),
|
||||
["\u001a"]);
|
||||
|
||||
assertEqArray(/[\cA]/u.exec("\u0001"),
|
||||
["\u0001"]);
|
||||
assertEqArray(/[\cZ]/u.exec("\u001a"),
|
||||
["\u001a"]);
|
||||
assertEqArray(/[\ca]/u.exec("\u0001"),
|
||||
["\u0001"]);
|
||||
assertEqArray(/[\cz]/u.exec("\u001a"),
|
||||
["\u001a"]);
|
||||
|
||||
assertThrowsInstanceOf(() => eval(`/\\c/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\c1/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\c_/u`), SyntaxError);
|
||||
|
||||
assertThrowsInstanceOf(() => eval(`/[\\c]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\c1]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\c_]/u`), SyntaxError);
|
||||
|
||||
// HexEscapeSequence
|
||||
assertThrowsInstanceOf(() => eval(`/\\x/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\x0/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\x1/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\x1G/u`), SyntaxError);
|
||||
|
||||
// LegacyOctalEscapeSequence
|
||||
assertThrowsInstanceOf(() => eval(`/\\52/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\052/u`), SyntaxError);
|
||||
|
||||
// DecimalEscape
|
||||
assertEqArray(/\0/u.exec("\0"),
|
||||
["\0"]);
|
||||
assertEqArray(/[\0]/u.exec("\0"),
|
||||
["\0"]);
|
||||
assertEqArray(/\0A/u.exec("\0A"),
|
||||
["\0A"]);
|
||||
assertEqArray(/\0G/u.exec("\0G"),
|
||||
["\0G"]);
|
||||
assertEqArray(/(A.)\1/u.exec("ABACABAB"),
|
||||
["ABAB", "AB"]);
|
||||
assertEqArray(/(A.)(B.)(C.)(D.)(E.)(F.)(G.)(H.)(I.)(J.)(K.)\10/u.exec("A1B2C3D4E5F6G7H8I9JaKbJa"),
|
||||
["A1B2C3D4E5F6G7H8I9JaKbJa", "A1", "B2", "C3", "D4", "E5", "F6", "G7", "H8", "I9", "Ja", "Kb"]);
|
||||
|
||||
assertThrowsInstanceOf(() => eval(`/\\00/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\01/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\09/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\1/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\2/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\3/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\4/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\5/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\6/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\7/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\8/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\9/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\10/u`), SyntaxError);
|
||||
|
||||
if (typeof reportCompare === "function")
|
||||
reportCompare(true, true);
|
@ -29,11 +29,11 @@ namespace js {
|
||||
*
|
||||
* https://developer.mozilla.org/en-US/docs/SpiderMonkey/Internals/Bytecode
|
||||
*/
|
||||
static const uint32_t XDR_BYTECODE_VERSION_SUBTRAHEND = 334;
|
||||
static const uint32_t XDR_BYTECODE_VERSION_SUBTRAHEND = 335;
|
||||
static const uint32_t XDR_BYTECODE_VERSION =
|
||||
uint32_t(0xb973c0de - XDR_BYTECODE_VERSION_SUBTRAHEND);
|
||||
|
||||
static_assert(JSErr_Limit == 429,
|
||||
static_assert(JSErr_Limit == 433,
|
||||
"GREETINGS, POTENTIAL SUBTRAHEND INCREMENTER! If you added or "
|
||||
"removed MSG_DEFs from js.msg, you should increment "
|
||||
"XDR_BYTECODE_VERSION_SUBTRAHEND and update this assertion's "
|
||||
|
Loading…
Reference in New Issue
Block a user