mirror of
https://github.com/classilla/tenfourfox.git
synced 2025-01-19 07:31:25 +00:00
#393, Bug 1135377 - Part 2: Parse RegExp unicode character in non-CharacterClass. r=till, f=anba
This commit is contained in:
parent
d00063089b
commit
c85a176bd8
@ -138,7 +138,8 @@ class RegExpAssertion : public RegExpTree {
|
||||
END_OF_LINE,
|
||||
END_OF_INPUT,
|
||||
BOUNDARY,
|
||||
NON_BOUNDARY
|
||||
NON_BOUNDARY,
|
||||
NOT_AFTER_LEAD_SURROGATE
|
||||
};
|
||||
explicit RegExpAssertion(AssertionType type) : assertion_type_(type) { }
|
||||
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
||||
|
@ -2061,6 +2061,8 @@ RegExpAssertion::ToNode(RegExpCompiler* compiler,
|
||||
result->AddAlternative(end_alternative);
|
||||
return result;
|
||||
}
|
||||
case NOT_AFTER_LEAD_SURROGATE:
|
||||
return AssertionNode::NotAfterLeadSurrogate(on_success);
|
||||
default:
|
||||
MOZ_CRASH("Bad assertion type");
|
||||
}
|
||||
@ -2848,6 +2850,31 @@ EmitHat(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace)
|
||||
on_success->Emit(compiler, &new_trace);
|
||||
}
|
||||
|
||||
// Assert that the next character cannot be a part of a surrogate pair.
|
||||
static void
|
||||
EmitNotAfterLeadSurrogate(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace)
|
||||
{
|
||||
RegExpMacroAssembler* assembler = compiler->macro_assembler();
|
||||
|
||||
// We will be loading the previous character into the current character
|
||||
// register.
|
||||
Trace new_trace(*trace);
|
||||
new_trace.InvalidateCurrentCharacter();
|
||||
|
||||
jit::Label ok;
|
||||
if (new_trace.cp_offset() == 0)
|
||||
assembler->CheckAtStart(&ok);
|
||||
|
||||
// We already checked that we are not at the start of input so it must be
|
||||
// OK to load the previous character.
|
||||
assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, new_trace.backtrack(), false);
|
||||
assembler->CheckCharacterInRange(unicode::LeadSurrogateMin, unicode::LeadSurrogateMax,
|
||||
new_trace.backtrack());
|
||||
|
||||
assembler->Bind(&ok);
|
||||
on_success->Emit(compiler, &new_trace);
|
||||
}
|
||||
|
||||
// Check for [0-9A-Z_a-z].
|
||||
static void
|
||||
EmitWordCheck(RegExpMacroAssembler* assembler,
|
||||
@ -3001,6 +3028,9 @@ AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace)
|
||||
EmitBoundaryCheck(compiler, trace);
|
||||
return;
|
||||
}
|
||||
case NOT_AFTER_LEAD_SURROGATE:
|
||||
EmitNotAfterLeadSurrogate(compiler, on_success(), trace);
|
||||
return;
|
||||
}
|
||||
on_success()->Emit(compiler, trace);
|
||||
}
|
||||
|
@ -788,7 +788,8 @@ class AssertionNode : public SeqRegExpNode
|
||||
AT_START,
|
||||
AT_BOUNDARY,
|
||||
AT_NON_BOUNDARY,
|
||||
AFTER_NEWLINE
|
||||
AFTER_NEWLINE,
|
||||
NOT_AFTER_LEAD_SURROGATE
|
||||
};
|
||||
AssertionNode(AssertionType t, RegExpNode* on_success)
|
||||
: SeqRegExpNode(on_success), assertion_type_(t)
|
||||
@ -809,6 +810,10 @@ class AssertionNode : public SeqRegExpNode
|
||||
static AssertionNode* AfterNewline(RegExpNode* on_success) {
|
||||
return on_success->alloc()->newInfallible<AssertionNode>(AFTER_NEWLINE, on_success);
|
||||
}
|
||||
static AssertionNode* NotAfterLeadSurrogate(RegExpNode* on_success) {
|
||||
return on_success->alloc()->newInfallible<AssertionNode>(NOT_AFTER_LEAD_SURROGATE,
|
||||
on_success);
|
||||
}
|
||||
virtual void Accept(NodeVisitor* visitor);
|
||||
virtual void Emit(RegExpCompiler* compiler, Trace* trace);
|
||||
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
|
||||
|
@ -302,6 +302,108 @@ RegExpParser<CharT>::ParseHexEscape(int length, size_t* value)
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
bool
|
||||
RegExpParser<CharT>::ParseBracedHexEscape(size_t* value)
|
||||
{
|
||||
MOZ_ASSERT(current() == '{');
|
||||
Advance();
|
||||
|
||||
bool first = true;
|
||||
uint32_t code = 0;
|
||||
while (true) {
|
||||
widechar c = current();
|
||||
if (c == kEndMarker) {
|
||||
ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
|
||||
return false;
|
||||
}
|
||||
if (c == '}') {
|
||||
if (first) {
|
||||
ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
|
||||
return false;
|
||||
}
|
||||
Advance();
|
||||
break;
|
||||
}
|
||||
|
||||
int d = HexValue(c);
|
||||
if (d < 0) {
|
||||
ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
|
||||
return false;
|
||||
}
|
||||
code = (code << 4) | d;
|
||||
if (code > unicode::NonBMPMax) {
|
||||
ReportError(JSMSG_UNICODE_OVERFLOW);
|
||||
return false;
|
||||
}
|
||||
Advance();
|
||||
first = false;
|
||||
}
|
||||
|
||||
*value = code;
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
bool
|
||||
RegExpParser<CharT>::ParseTrailSurrogate(size_t* value)
|
||||
{
|
||||
if (current() != '\\')
|
||||
return false;
|
||||
|
||||
const CharT* start = position();
|
||||
Advance();
|
||||
if (current() != 'u') {
|
||||
Reset(start);
|
||||
return false;
|
||||
}
|
||||
Advance();
|
||||
if (!ParseHexEscape(4, value)) {
|
||||
Reset(start);
|
||||
return false;
|
||||
}
|
||||
if (!unicode::IsTrailSurrogate(*value)) {
|
||||
Reset(start);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
bool
|
||||
RegExpParser<CharT>::ParseRawSurrogatePair(char16_t* lead, char16_t* trail)
|
||||
{
|
||||
widechar c1 = current();
|
||||
if (!unicode::IsLeadSurrogate(c1))
|
||||
return false;
|
||||
|
||||
const CharT* start = position();
|
||||
Advance();
|
||||
widechar c2 = current();
|
||||
if (!unicode::IsTrailSurrogate(c2)) {
|
||||
Reset(start);
|
||||
return false;
|
||||
}
|
||||
Advance();
|
||||
*lead = c1;
|
||||
*trail = c2;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline RegExpTree*
|
||||
RangeAtom(LifoAlloc* alloc, char16_t from, char16_t to)
|
||||
{
|
||||
CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
|
||||
ranges->append(CharacterRange::Range(from, to));
|
||||
return alloc->newInfallible<RegExpCharacterClass>(ranges, false);
|
||||
}
|
||||
|
||||
static inline RegExpTree*
|
||||
NegativeLookahead(LifoAlloc* alloc, char16_t from, char16_t to)
|
||||
{
|
||||
return alloc->newInfallible<RegExpLookahead>(RangeAtom(alloc, from, to), false, 0, 0);
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
// Currently only used in an assert.kASSERT.
|
||||
static bool
|
||||
@ -675,6 +777,35 @@ RegExpParser<CharT>::ParsePattern()
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline RegExpTree*
|
||||
SurrogatePairAtom(LifoAlloc* alloc, char16_t lead, char16_t trail)
|
||||
{
|
||||
RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
|
||||
builder->AddCharacter(lead);
|
||||
builder->AddCharacter(trail);
|
||||
return builder->ToRegExp();
|
||||
}
|
||||
|
||||
static inline RegExpTree*
|
||||
LeadSurrogateAtom(LifoAlloc* alloc, char16_t value)
|
||||
{
|
||||
RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
|
||||
builder->AddCharacter(value);
|
||||
builder->AddAtom(NegativeLookahead(alloc, unicode::TrailSurrogateMin,
|
||||
unicode::TrailSurrogateMax));
|
||||
return builder->ToRegExp();
|
||||
}
|
||||
|
||||
static inline RegExpTree*
|
||||
TrailSurrogateAtom(LifoAlloc* alloc, char16_t value)
|
||||
{
|
||||
RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
|
||||
builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(
|
||||
RegExpAssertion::NOT_AFTER_LEAD_SURROGATE));
|
||||
builder->AddCharacter(value);
|
||||
return builder->ToRegExp();
|
||||
}
|
||||
|
||||
// Disjunction ::
|
||||
// Alternative
|
||||
// Alternative | Disjunction
|
||||
@ -929,6 +1060,38 @@ RegExpParser<CharT>::ParseDisjunction()
|
||||
case 'u': {
|
||||
Advance(2);
|
||||
size_t value;
|
||||
if (unicode_) {
|
||||
if (current() == '{') {
|
||||
if (!ParseBracedHexEscape(&value))
|
||||
return nullptr;
|
||||
if (unicode::IsLeadSurrogate(value)) {
|
||||
builder->AddAtom(LeadSurrogateAtom(alloc, value));
|
||||
} else if (unicode::IsTrailSurrogate(value)) {
|
||||
builder->AddAtom(TrailSurrogateAtom(alloc, value));
|
||||
} else if (value >= unicode::NonBMPMin) {
|
||||
size_t lead, trail;
|
||||
unicode::UTF16Encode(value, &lead, &trail);
|
||||
builder->AddAtom(SurrogatePairAtom(alloc, lead, trail));
|
||||
} else {
|
||||
builder->AddCharacter(value);
|
||||
}
|
||||
} else if (ParseHexEscape(4, &value)) {
|
||||
if (unicode::IsLeadSurrogate(value)) {
|
||||
size_t trail;
|
||||
if (ParseTrailSurrogate(&trail))
|
||||
builder->AddAtom(SurrogatePairAtom(alloc, value, trail));
|
||||
else
|
||||
builder->AddAtom(LeadSurrogateAtom(alloc, value));
|
||||
} else if (unicode::IsTrailSurrogate(value)) {
|
||||
builder->AddAtom(TrailSurrogateAtom(alloc, value));
|
||||
} else {
|
||||
builder->AddCharacter(value);
|
||||
}
|
||||
} else {
|
||||
return ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (ParseHexEscape(4, &value)) {
|
||||
builder->AddCharacter(value);
|
||||
} else {
|
||||
@ -950,6 +1113,22 @@ RegExpParser<CharT>::ParseDisjunction()
|
||||
// fallthrough
|
||||
}
|
||||
default:
|
||||
if (unicode_) {
|
||||
char16_t lead, trail;
|
||||
if (ParseRawSurrogatePair(&lead, &trail)) {
|
||||
builder->AddAtom(SurrogatePairAtom(alloc, lead, trail));
|
||||
} else {
|
||||
widechar c = current();
|
||||
if (unicode::IsLeadSurrogate(c))
|
||||
builder->AddAtom(LeadSurrogateAtom(alloc, c));
|
||||
else if (unicode::IsTrailSurrogate(c))
|
||||
builder->AddAtom(TrailSurrogateAtom(alloc, c));
|
||||
else
|
||||
builder->AddCharacter(c);
|
||||
Advance();
|
||||
}
|
||||
break;
|
||||
}
|
||||
builder->AddCharacter(current());
|
||||
Advance();
|
||||
break;
|
||||
|
@ -193,6 +193,10 @@ class RegExpParser
|
||||
// and sets the value if it is.
|
||||
bool ParseHexEscape(int length, size_t* value);
|
||||
|
||||
bool ParseBracedHexEscape(size_t* value);
|
||||
bool ParseTrailSurrogate(size_t* value);
|
||||
bool ParseRawSurrogatePair(char16_t* lead, char16_t* trail);
|
||||
|
||||
size_t ParseOctalLiteral();
|
||||
|
||||
// Tries to parse the input as a back reference. If successful it
|
||||
|
@ -450,11 +450,14 @@ MSG_DEF(JSMSG_UNDEFINED_CURRENCY, 0, JSEXN_TYPEERR, "undefined currency in
|
||||
MSG_DEF(JSMSG_BAD_CLASS_RANGE, 0, JSEXN_SYNTAXERR, "invalid range in character class")
|
||||
MSG_DEF(JSMSG_ESCAPE_AT_END_OF_REGEXP, 0, JSEXN_SYNTAXERR, "\\ at end of pattern")
|
||||
MSG_DEF(JSMSG_INVALID_GROUP, 0, JSEXN_SYNTAXERR, "invalid regexp group")
|
||||
MSG_DEF(JSMSG_INVALID_IDENTITY_ESCAPE, 0, JSEXN_SYNTAXERR, "invalid identity escape in regular expression")
|
||||
MSG_DEF(JSMSG_INVALID_UNICODE_ESCAPE, 0, JSEXN_SYNTAXERR, "invalid unicode escape in regular expression")
|
||||
MSG_DEF(JSMSG_MISSING_PAREN, 0, JSEXN_SYNTAXERR, "unterminated parenthetical")
|
||||
MSG_DEF(JSMSG_NEWREGEXP_FLAGGED, 0, JSEXN_TYPEERR, "can't supply flags when constructing one RegExp from another")
|
||||
MSG_DEF(JSMSG_NOTHING_TO_REPEAT, 0, JSEXN_SYNTAXERR, "nothing to repeat")
|
||||
MSG_DEF(JSMSG_NUMBERS_OUT_OF_ORDER, 0, JSEXN_SYNTAXERR, "numbers out of order in {} quantifier.")
|
||||
MSG_DEF(JSMSG_TOO_MANY_PARENS, 0, JSEXN_INTERNALERR, "too many parentheses in regular expression")
|
||||
MSG_DEF(JSMSG_UNICODE_OVERFLOW, 0, JSEXN_SYNTAXERR, "unicode codepoint should not be greater than 0x10FFFF in regular expression")
|
||||
MSG_DEF(JSMSG_UNMATCHED_RIGHT_PAREN, 0, JSEXN_SYNTAXERR, "unmatched ) in regular expression")
|
||||
MSG_DEF(JSMSG_UNTERM_CLASS, 0, JSEXN_SYNTAXERR, "unterminated character class")
|
||||
|
||||
|
166
js/src/tests/ecma_6/RegExp/unicode-braced.js
Normal file
166
js/src/tests/ecma_6/RegExp/unicode-braced.js
Normal file
@ -0,0 +1,166 @@
|
||||
var BUGNUMBER = 1135377;
|
||||
var summary = "Implement RegExp unicode flag -- braced pattern in RegExpUnicodeEscapeSequence.";
|
||||
|
||||
print(BUGNUMBER + ": " + summary);
|
||||
|
||||
// ==== standalone ====
|
||||
|
||||
assertEqArray(/\u{41}/u.exec("ABC"),
|
||||
["A"]);
|
||||
assertEqArray(/\u{41}/.exec("ABC" + "u".repeat(41)),
|
||||
["u".repeat(41)]);
|
||||
|
||||
assertEqArray(/\u{4A}/u.exec("JKL"),
|
||||
["J"]);
|
||||
assertEqArray(/\u{4A}/.exec("JKLu{4A}"),
|
||||
["u{4A}"]);
|
||||
|
||||
assertEqArray(/\u{1F438}/u.exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEqArray(/\u{1F438}/.exec("u{1F438}"),
|
||||
["u{1F438}"]);
|
||||
|
||||
assertEqArray(/\u{0}/u.exec("\u{0}"),
|
||||
["\u{0}"]);
|
||||
assertEqArray(/\u{10FFFF}/u.exec("\u{10FFFF}"),
|
||||
["\u{10FFFF}"]);
|
||||
assertEqArray(/\u{10ffff}/u.exec("\u{10FFFF}"),
|
||||
["\u{10FFFF}"]);
|
||||
|
||||
// leading 0
|
||||
assertEqArray(/\u{0000000000000000000000}/u.exec("\u{0}"),
|
||||
["\u{0}"]);
|
||||
assertEqArray(/\u{000000000000000010FFFF}/u.exec("\u{10FFFF}"),
|
||||
["\u{10FFFF}"]);
|
||||
|
||||
// RegExp constructor
|
||||
assertEqArray(new RegExp("\\u{0}", "u").exec("\u{0}"),
|
||||
["\u{0}"]);
|
||||
assertEqArray(new RegExp("\\u{41}", "u").exec("ABC"),
|
||||
["A"]);
|
||||
assertEqArray(new RegExp("\\u{1F438}", "u").exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEqArray(new RegExp("\\u{10FFFF}", "u").exec("\u{10FFFF}"),
|
||||
["\u{10FFFF}"]);
|
||||
|
||||
assertEqArray(new RegExp("\\u{0000000000000000}", "u").exec("\u{0}"),
|
||||
["\u{0}"]);
|
||||
|
||||
assertEqArray(eval(`/\\u{${"0".repeat(Math.pow(2, 24)) + "1234"}}/u`).exec("\u{1234}"),
|
||||
["\u{1234}"]);
|
||||
assertEqArray(new RegExp(`\\u{${"0".repeat(Math.pow(2, 24)) + "1234"}}`, "u").exec("\u{1234}"),
|
||||
["\u{1234}"]);
|
||||
|
||||
// ==== ? ====
|
||||
|
||||
assertEqArray(/\u{1F438}?/u.exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEqArray(/\u{1F438}?/u.exec(""),
|
||||
[""]);
|
||||
|
||||
// lead-only target
|
||||
assertEqArray(/\u{1F438}?/u.exec("\uD83D"),
|
||||
[""]);
|
||||
|
||||
// RegExp constructor
|
||||
assertEqArray(new RegExp("\\u{1F438}?", "u").exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEqArray(new RegExp("\\u{1F438}?", "u").exec(""),
|
||||
[""]);
|
||||
assertEqArray(new RegExp("\\u{1F438}?", "u").exec("\uD83D"),
|
||||
[""]);
|
||||
|
||||
// ==== + ====
|
||||
|
||||
assertEqArray(/\u{1F438}+/u.exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEqArray(/\u{1F438}+/u.exec("\u{1F438}\u{1F438}"),
|
||||
["\u{1F438}\u{1F438}"]);
|
||||
assertEq(/\u{1F438}+/u.exec(""),
|
||||
null);
|
||||
|
||||
// lead-only target
|
||||
assertEq(/\u{1F438}+/u.exec("\uD83D"),
|
||||
null);
|
||||
assertEqArray(/\u{1F438}+/u.exec("\uD83D\uDC38\uDC38"),
|
||||
["\uD83D\uDC38"]);
|
||||
|
||||
// ==== * ====
|
||||
|
||||
assertEqArray(/\u{1F438}*/u.exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEqArray(/\u{1F438}*/u.exec("\u{1F438}\u{1F438}"),
|
||||
["\u{1F438}\u{1F438}"]);
|
||||
assertEqArray(/\u{1F438}*/u.exec(""),
|
||||
[""]);
|
||||
|
||||
// lead-only target
|
||||
assertEqArray(/\u{1F438}*/u.exec("\uD83D"),
|
||||
[""]);
|
||||
assertEqArray(/\u{1F438}*/u.exec("\uD83D\uDC38\uDC38"),
|
||||
["\uD83D\uDC38"]);
|
||||
|
||||
// ==== lead-only ====
|
||||
|
||||
// match only non-surrogate pair
|
||||
assertEqArray(/\u{D83D}/u.exec("\uD83D\uDBFF"),
|
||||
["\uD83D"]);
|
||||
assertEq(/\u{D83D}/u.exec("\uD83D\uDC00"),
|
||||
null);
|
||||
assertEq(/\u{D83D}/u.exec("\uD83D\uDFFF"),
|
||||
null);
|
||||
assertEqArray(/\u{D83D}/u.exec("\uD83D\uE000"),
|
||||
["\uD83D"]);
|
||||
|
||||
// match before non-tail char
|
||||
assertEqArray(/\u{D83D}/u.exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/\u{D83D}/u.exec("\uD83DA"),
|
||||
["\uD83D"]);
|
||||
|
||||
// ==== trail-only ====
|
||||
|
||||
// match only non-surrogate pair
|
||||
assertEqArray(/\u{DC38}/u.exec("\uD7FF\uDC38"),
|
||||
["\uDC38"]);
|
||||
assertEq(/\u{DC38}/u.exec("\uD800\uDC38"),
|
||||
null);
|
||||
assertEq(/\u{DC38}/u.exec("\uDBFF\uDC38"),
|
||||
null);
|
||||
assertEqArray(/\u{DC38}/u.exec("\uDC00\uDC38"),
|
||||
["\uDC38"]);
|
||||
|
||||
// match after non-lead char
|
||||
assertEqArray(/\u{DC38}/u.exec("\uDC38"),
|
||||
["\uDC38"]);
|
||||
assertEqArray(/\u{DC38}/u.exec("A\uDC38"),
|
||||
["\uDC38"]);
|
||||
|
||||
// ==== wrong patterns ====
|
||||
|
||||
assertThrowsInstanceOf(() => eval(`/\\u{-1}/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\u{0.0}/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\u{G}/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\u{}/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\u{{/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\u{/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\u{110000}/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\u{00110000}/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\u{100000000000000000000000000000}/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\u{FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF}/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\u{ FFFF}/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\u{FFFF }/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\u{FF FF}/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\u{F F F F}/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\u{100000001}/u`), SyntaxError);
|
||||
|
||||
// surrogate pair with braced
|
||||
assertEq(/\u{D83D}\u{DC38}+/u.exec("\uD83D\uDC38\uDC38"),
|
||||
null);
|
||||
assertEq(/\uD83D\u{DC38}+/u.exec("\uD83D\uDC38\uDC38"),
|
||||
null);
|
||||
assertEq(/\u{D83D}\uDC38+/u.exec("\uD83D\uDC38\uDC38"),
|
||||
null);
|
||||
|
||||
if (typeof reportCompare === "function")
|
||||
reportCompare(true, true);
|
218
js/src/tests/ecma_6/RegExp/unicode-lead-trail.js
Normal file
218
js/src/tests/ecma_6/RegExp/unicode-lead-trail.js
Normal file
@ -0,0 +1,218 @@
|
||||
var BUGNUMBER = 1135377;
|
||||
var summary = "Implement RegExp unicode flag -- lead and trail patterns in RegExpUnicodeEscapeSequence.";
|
||||
|
||||
print(BUGNUMBER + ": " + summary);
|
||||
|
||||
// ==== standalone ====
|
||||
|
||||
assertEqArray(/\uD83D\uDC38/u.exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
|
||||
// no unicode flag
|
||||
assertEqArray(/\uD83D\uDC38/.exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
|
||||
// RegExp constructor
|
||||
assertEqArray(new RegExp("\\uD83D\\uDC38", "u").exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
|
||||
// RegExp constructor, no unicode flag
|
||||
assertEqArray(new RegExp("\\uD83D\\uDC38", "").exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
|
||||
// ==== ? ====
|
||||
|
||||
assertEqArray(/\uD83D\uDC38?/u.exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEqArray(/\uD83D\uDC38?/u.exec(""),
|
||||
[""]);
|
||||
|
||||
// lead-only target
|
||||
assertEqArray(/\uD83D\uDC38?/u.exec("\uD83D"),
|
||||
[""]);
|
||||
|
||||
// no unicode flag
|
||||
assertEqArray(/\uD83D\uDC38?/.exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEq(/\uD83D\uDC38?/.exec(""),
|
||||
null);
|
||||
|
||||
assertEqArray(/\uD83D\uDC38?/.exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
|
||||
// RegExp constructor
|
||||
assertEqArray(new RegExp("\\uD83D\\uDC38?", "u").exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEqArray(new RegExp("\\uD83D\\uDC38?", "u").exec(""),
|
||||
[""]);
|
||||
|
||||
assertEqArray(new RegExp("\\uD83D\\uDC38?", "u").exec("\uD83D"),
|
||||
[""]);
|
||||
|
||||
// RegExp constructor, no unicode flag
|
||||
assertEqArray(new RegExp("\\uD83D\\uDC38?", "").exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEq(new RegExp("\\uD83D\\uDC38?", "").exec(""),
|
||||
null);
|
||||
|
||||
assertEqArray(new RegExp("\\uD83D\\uDC38?", "").exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
|
||||
// ==== + ====
|
||||
|
||||
assertEqArray(/\uD83D\uDC38+/u.exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEqArray(/\uD83D\uDC38+/u.exec("\u{1F438}\u{1F438}"),
|
||||
["\u{1F438}\u{1F438}"]);
|
||||
assertEq(/\uD83D\uDC38+/u.exec(""),
|
||||
null);
|
||||
|
||||
// lead-only target
|
||||
assertEq(/\uD83D\uDC38+/u.exec("\uD83D"),
|
||||
null);
|
||||
assertEqArray(/\uD83D\uDC38+/u.exec("\uD83D\uDC38\uDC38"),
|
||||
["\uD83D\uDC38"]);
|
||||
|
||||
// no unicode flag
|
||||
assertEqArray(/\uD83D\uDC38+/.exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEqArray(/\uD83D\uDC38+/.exec("\u{1F438}\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEq(/\uD83D\uDC38+/.exec("\uD83D"),
|
||||
null);
|
||||
assertEqArray(/\uD83D\uDC38+/.exec("\uD83D\uDC38\uDC38"),
|
||||
["\uD83D\uDC38\uDC38"]);
|
||||
assertEq(/\uD83D\uDC38+/.exec(""),
|
||||
null);
|
||||
|
||||
// ==== * ====
|
||||
|
||||
assertEqArray(/\uD83D\uDC38*/u.exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEqArray(/\uD83D\uDC38*/u.exec("\u{1F438}\u{1F438}"),
|
||||
["\u{1F438}\u{1F438}"]);
|
||||
assertEqArray(/\uD83D\uDC38*/u.exec(""),
|
||||
[""]);
|
||||
|
||||
// lead-only target
|
||||
assertEqArray(/\uD83D\uDC38*/u.exec("\uD83D"),
|
||||
[""]);
|
||||
assertEqArray(/\uD83D\uDC38*/u.exec("\uD83D\uDC38\uDC38"),
|
||||
["\uD83D\uDC38"]);
|
||||
|
||||
// no unicode flag
|
||||
assertEqArray(/\uD83D\uDC38*/.exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEqArray(/\uD83D\uDC38*/.exec("\u{1F438}\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEqArray(/\uD83D\uDC38*/.exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/\uD83D\uDC38*/.exec("\uD83D\uDC38\uDC38"),
|
||||
["\uD83D\uDC38\uDC38"]);
|
||||
assertEq(/\uD83D\uDC38*/.exec(""),
|
||||
null);
|
||||
|
||||
// ==== lead-only ====
|
||||
|
||||
// match only non-surrogate pair
|
||||
assertEqArray(/\uD83D/u.exec("\uD83D\uDBFF"),
|
||||
["\uD83D"]);
|
||||
assertEq(/\uD83D/u.exec("\uD83D\uDC00"),
|
||||
null);
|
||||
assertEq(/\uD83D/u.exec("\uD83D\uDFFF"),
|
||||
null);
|
||||
assertEqArray(/\uD83D/u.exec("\uD83D\uE000"),
|
||||
["\uD83D"]);
|
||||
|
||||
// match before non-tail char
|
||||
assertEqArray(/\uD83D/u.exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/\uD83D/u.exec("\uD83DA"),
|
||||
["\uD83D"]);
|
||||
|
||||
// no unicode flag
|
||||
assertEqArray(/\uD83D/.exec("\uD83D\uDBFF"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/\uD83D/.exec("\uD83D\uDC00"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/\uD83D/.exec("\uD83D\uDFFF"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/\uD83D/.exec("\uD83D\uE000"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/\uD83D/.exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/\uD83D/.exec("\uD83DA"),
|
||||
["\uD83D"]);
|
||||
|
||||
// ==== trail-only ====
|
||||
|
||||
// match only non-surrogate pair
|
||||
assertEqArray(/\uDC38/u.exec("\uD7FF\uDC38"),
|
||||
["\uDC38"]);
|
||||
assertEq(/\uDC38/u.exec("\uD800\uDC38"),
|
||||
null);
|
||||
assertEq(/\uDC38/u.exec("\uDBFF\uDC38"),
|
||||
null);
|
||||
assertEqArray(/\uDC38/u.exec("\uDC00\uDC38"),
|
||||
["\uDC38"]);
|
||||
|
||||
// match after non-lead char
|
||||
assertEqArray(/\uDC38/u.exec("\uDC38"),
|
||||
["\uDC38"]);
|
||||
assertEqArray(/\uDC38/u.exec("A\uDC38"),
|
||||
["\uDC38"]);
|
||||
|
||||
// no unicode flag
|
||||
assertEqArray(/\uDC38/.exec("\uD7FF\uDC38"),
|
||||
["\uDC38"]);
|
||||
assertEqArray(/\uDC38/.exec("\uD800\uDC38"),
|
||||
["\uDC38"]);
|
||||
assertEqArray(/\uDC38/.exec("\uDBFF\uDC38"),
|
||||
["\uDC38"]);
|
||||
assertEqArray(/\uDC38/.exec("\uDC00\uDC38"),
|
||||
["\uDC38"]);
|
||||
assertEqArray(/\uDC38/.exec("\uDC38"),
|
||||
["\uDC38"]);
|
||||
assertEqArray(/\uDC38/.exec("A\uDC38"),
|
||||
["\uDC38"]);
|
||||
|
||||
// ==== invalid trail ====
|
||||
|
||||
assertEqArray(/\uD83D\u3042*/u.exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/\uD83D\u3042*/u.exec("\uD83D\u3042"),
|
||||
["\uD83D\u3042"]);
|
||||
assertEqArray(/\uD83D\u3042*/u.exec("\uD83D\u3042\u3042"),
|
||||
["\uD83D\u3042\u3042"]);
|
||||
|
||||
assertEqArray(/\uD83D\u{3042}*/u.exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/\uD83D\u{3042}*/u.exec("\uD83D\u3042"),
|
||||
["\uD83D\u3042"]);
|
||||
assertEqArray(/\uD83D\u{3042}*/u.exec("\uD83D\u3042\u3042"),
|
||||
["\uD83D\u3042\u3042"]);
|
||||
|
||||
assertEqArray(/\uD83DA*/u.exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/\uD83DA*/u.exec("\uD83DA"),
|
||||
["\uD83DA"]);
|
||||
assertEqArray(/\uD83DA*/u.exec("\uD83DAA"),
|
||||
["\uD83DAA"]);
|
||||
|
||||
// ==== wrong patterns ====
|
||||
|
||||
assertThrowsInstanceOf(() => eval(`/\\u/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\u0/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\u00/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\u000/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\u000G/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\u0.00/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\uD83D\\u/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\uD83D\\u0/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\uD83D\\u00/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\uD83D\\u000/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\uD83D\\u000G/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/\\uD83D\\u0.00/u`), SyntaxError);
|
||||
|
||||
if (typeof reportCompare === "function")
|
||||
reportCompare(true, true);
|
139
js/src/tests/ecma_6/RegExp/unicode-raw.js
Normal file
139
js/src/tests/ecma_6/RegExp/unicode-raw.js
Normal file
@ -0,0 +1,139 @@
|
||||
var BUGNUMBER = 1135377;
|
||||
var summary = "Implement RegExp unicode flag -- raw unicode.";
|
||||
|
||||
print(BUGNUMBER + ": " + summary);
|
||||
|
||||
// ==== standalone ====
|
||||
|
||||
assertEqArray(eval(`/\uD83D\uDC38/u`).exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
|
||||
// no unicode flag
|
||||
assertEqArray(eval(`/\uD83D\uDC38/`).exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
|
||||
// escaped (lead)
|
||||
assertEq(eval(`/\\uD83D\uDC38/u`).exec("\u{1F438}"),
|
||||
null);
|
||||
assertEq(eval(`/\\u{D83D}\uDC38/u`).exec("\u{1F438}"),
|
||||
null);
|
||||
|
||||
// escaped (trail)
|
||||
assertEq(eval(`/\uD83D\\uDC38/u`).exec("\u{1F438}"),
|
||||
null);
|
||||
assertEq(eval(`/\uD83D\\u{DC38}/u`).exec("\u{1F438}"),
|
||||
null);
|
||||
|
||||
// escaped (lead), no unicode flag
|
||||
assertEqArray(eval(`/\\uD83D\uDC38/`).exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
|
||||
// escaped (trail), no unicode flag
|
||||
assertEqArray(eval(`/\uD83D\\uDC38/`).exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
|
||||
// ==== RegExp constructor ====
|
||||
|
||||
assertEqArray(new RegExp("\uD83D\uDC38", "u").exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
|
||||
// no unicode flag
|
||||
assertEqArray(new RegExp("\uD83D\uDC38", "").exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
|
||||
// escaped(lead)
|
||||
assertEq(new RegExp("\\uD83D\uDC38", "u").exec("\u{1F438}"),
|
||||
null);
|
||||
assertEq(new RegExp("\\u{D83D}\uDC38", "u").exec("\u{1F438}"),
|
||||
null);
|
||||
|
||||
// escaped(trail)
|
||||
assertEq(new RegExp("\uD83D\\uDC38", "u").exec("\u{1F438}"),
|
||||
null);
|
||||
assertEq(new RegExp("\uD83D\\u{DC38}", "u").exec("\u{1F438}"),
|
||||
null);
|
||||
|
||||
// escaped(lead), no unicode flag
|
||||
assertEqArray(new RegExp("\\uD83D\uDC38", "").exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
|
||||
// escaped(trail), no unicode flag
|
||||
assertEqArray(new RegExp("\uD83D\\uDC38", "").exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
|
||||
// ==== ? ====
|
||||
|
||||
assertEqArray(eval(`/\uD83D\uDC38?/u`).exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEqArray(eval(`/\uD83D\uDC38?/u`).exec(""),
|
||||
[""]);
|
||||
|
||||
assertEqArray(eval(`/\uD83D\uDC38?/u`).exec("\uD83D"),
|
||||
[""]);
|
||||
|
||||
// no unicode flag
|
||||
assertEqArray(eval(`/\uD83D\uDC38?/`).exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEq(eval(`/\uD83D\uDC38?/`).exec(""),
|
||||
null);
|
||||
|
||||
assertEqArray(eval(`/\uD83D\uDC38?/`).exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
|
||||
// escaped (lead)
|
||||
assertEq(eval(`/\\uD83D\uDC38?/u`).exec("\u{1F438}"),
|
||||
null);
|
||||
assertEq(eval(`/\\uD83D\uDC38?/u`).exec(""),
|
||||
null);
|
||||
|
||||
assertEqArray(eval(`/\\uD83D\uDC38?/u`).exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
|
||||
// escaped (trail)
|
||||
assertEq(eval(`/\uD83D\\uDC38?/u`).exec("\u{1F438}"),
|
||||
null);
|
||||
assertEq(eval(`/\uD83D\\uDC38?/u`).exec(""),
|
||||
null);
|
||||
|
||||
assertEqArray(eval(`/\uD83D\\uDC38?/u`).exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
|
||||
// escaped (lead), no unicode flag
|
||||
assertEqArray(eval(`/\\uD83D\uDC38?/`).exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEq(eval(`/\\uD83D\uDC38?/`).exec(""),
|
||||
null);
|
||||
|
||||
assertEqArray(eval(`/\\uD83D\uDC38?/`).exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
|
||||
// escaped (trail), no unicode flag
|
||||
assertEqArray(eval(`/\uD83D\\uDC38?/`).exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEq(eval(`/\uD83D\\uDC38?/`).exec(""),
|
||||
null);
|
||||
|
||||
assertEqArray(eval(`/\uD83D\\uDC38?/`).exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
|
||||
// ==== RegExp constructor, ? ====
|
||||
|
||||
assertEqArray(new RegExp("\uD83D\uDC38?", "u").exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEqArray(new RegExp("\uD83D\uDC38?", "u").exec(""),
|
||||
[""]);
|
||||
|
||||
assertEqArray(new RegExp("\uD83D\uDC38?", "u").exec("\uD83D"),
|
||||
[""]);
|
||||
|
||||
// no unicode flag
|
||||
assertEqArray(new RegExp("\uD83D\uDC38?", "").exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEq(new RegExp("\uD83D\uDC38?", "").exec(""),
|
||||
null);
|
||||
|
||||
assertEqArray(new RegExp("\uD83D\uDC38?", "").exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
|
||||
if (typeof reportCompare === "function")
|
||||
reportCompare(true, true);
|
@ -234,6 +234,44 @@ CanLowerCase(char16_t ch)
|
||||
return CharInfo(ch).lowerCase != 0;
|
||||
}
|
||||
|
||||
const size_t LeadSurrogateMin = 0xD800;
|
||||
const size_t LeadSurrogateMax = 0xDBFF;
|
||||
const size_t TrailSurrogateMin = 0xDC00;
|
||||
const size_t TrailSurrogateMax = 0xDFFF;
|
||||
const size_t UTF16Max = 0xFFFF;
|
||||
const size_t NonBMPMin = 0x10000;
|
||||
const size_t NonBMPMax = 0x10FFFF;
|
||||
|
||||
inline bool
|
||||
IsLeadSurrogate(size_t value)
|
||||
{
|
||||
return value >= LeadSurrogateMin && value <= LeadSurrogateMax;
|
||||
}
|
||||
|
||||
inline bool
|
||||
IsTrailSurrogate(size_t value)
|
||||
{
|
||||
return value >= TrailSurrogateMin && value <= TrailSurrogateMax;
|
||||
}
|
||||
|
||||
inline void
|
||||
UTF16Encode(size_t cp, size_t* lead, size_t* trail)
|
||||
{
|
||||
MOZ_ASSERT(cp >= NonBMPMin && cp <= NonBMPMax);
|
||||
|
||||
*lead = (cp - NonBMPMin) / 1024 + LeadSurrogateMin;
|
||||
*trail = ((cp - NonBMPMin) % 1024) + TrailSurrogateMin;
|
||||
}
|
||||
|
||||
inline size_t
|
||||
UTF16Decode(size_t lead, size_t trail)
|
||||
{
|
||||
MOZ_ASSERT(IsLeadSurrogate(lead));
|
||||
MOZ_ASSERT(IsTrailSurrogate(trail));
|
||||
|
||||
return (lead - LeadSurrogateMin) * 1024 + (trail - TrailSurrogateMin) + NonBMPMin;
|
||||
}
|
||||
|
||||
} /* namespace unicode */
|
||||
} /* namespace js */
|
||||
|
||||
|
@ -29,11 +29,11 @@ namespace js {
|
||||
*
|
||||
* https://developer.mozilla.org/en-US/docs/SpiderMonkey/Internals/Bytecode
|
||||
*/
|
||||
static const uint32_t XDR_BYTECODE_VERSION_SUBTRAHEND = 332;
|
||||
static const uint32_t XDR_BYTECODE_VERSION_SUBTRAHEND = 333;
|
||||
static const uint32_t XDR_BYTECODE_VERSION =
|
||||
uint32_t(0xb973c0de - XDR_BYTECODE_VERSION_SUBTRAHEND);
|
||||
|
||||
static_assert(JSErr_Limit == 425,
|
||||
static_assert(JSErr_Limit == 428,
|
||||
"GREETINGS, POTENTIAL SUBTRAHEND INCREMENTER! If you added or "
|
||||
"removed MSG_DEFs from js.msg, you should increment "
|
||||
"XDR_BYTECODE_VERSION_SUBTRAHEND and update this assertion's "
|
||||
|
Loading…
x
Reference in New Issue
Block a user