#393, Bug 1135377 - Part 2: Parse RegExp unicode character in non-CharacterClass. r=till, f=anba

This commit is contained in:
Tooru Fujisawa 2015-08-07 08:11:07 +09:00 committed by Cameron Kaiser
parent d00063089b
commit c85a176bd8
11 changed files with 787 additions and 4 deletions

View File

@ -138,7 +138,8 @@ class RegExpAssertion : public RegExpTree {
END_OF_LINE,
END_OF_INPUT,
BOUNDARY,
NON_BOUNDARY
NON_BOUNDARY,
NOT_AFTER_LEAD_SURROGATE
};
explicit RegExpAssertion(AssertionType type) : assertion_type_(type) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);

View File

@ -2061,6 +2061,8 @@ RegExpAssertion::ToNode(RegExpCompiler* compiler,
result->AddAlternative(end_alternative);
return result;
}
case NOT_AFTER_LEAD_SURROGATE:
return AssertionNode::NotAfterLeadSurrogate(on_success);
default:
MOZ_CRASH("Bad assertion type");
}
@ -2848,6 +2850,31 @@ EmitHat(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace)
on_success->Emit(compiler, &new_trace);
}
// Assert that the next character cannot be a part of a surrogate pair.
static void
EmitNotAfterLeadSurrogate(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace)
{
RegExpMacroAssembler* assembler = compiler->macro_assembler();
// We will be loading the previous character into the current character
// register.
Trace new_trace(*trace);
new_trace.InvalidateCurrentCharacter();
jit::Label ok;
if (new_trace.cp_offset() == 0)
assembler->CheckAtStart(&ok);
// We already checked that we are not at the start of input so it must be
// OK to load the previous character.
assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, new_trace.backtrack(), false);
assembler->CheckCharacterInRange(unicode::LeadSurrogateMin, unicode::LeadSurrogateMax,
new_trace.backtrack());
assembler->Bind(&ok);
on_success->Emit(compiler, &new_trace);
}
// Check for [0-9A-Z_a-z].
static void
EmitWordCheck(RegExpMacroAssembler* assembler,
@ -3001,6 +3028,9 @@ AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace)
EmitBoundaryCheck(compiler, trace);
return;
}
case NOT_AFTER_LEAD_SURROGATE:
EmitNotAfterLeadSurrogate(compiler, on_success(), trace);
return;
}
on_success()->Emit(compiler, trace);
}

View File

@ -788,7 +788,8 @@ class AssertionNode : public SeqRegExpNode
AT_START,
AT_BOUNDARY,
AT_NON_BOUNDARY,
AFTER_NEWLINE
AFTER_NEWLINE,
NOT_AFTER_LEAD_SURROGATE
};
AssertionNode(AssertionType t, RegExpNode* on_success)
: SeqRegExpNode(on_success), assertion_type_(t)
@ -809,6 +810,10 @@ class AssertionNode : public SeqRegExpNode
static AssertionNode* AfterNewline(RegExpNode* on_success) {
return on_success->alloc()->newInfallible<AssertionNode>(AFTER_NEWLINE, on_success);
}
static AssertionNode* NotAfterLeadSurrogate(RegExpNode* on_success) {
return on_success->alloc()->newInfallible<AssertionNode>(NOT_AFTER_LEAD_SURROGATE,
on_success);
}
virtual void Accept(NodeVisitor* visitor);
virtual void Emit(RegExpCompiler* compiler, Trace* trace);
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);

View File

@ -302,6 +302,108 @@ RegExpParser<CharT>::ParseHexEscape(int length, size_t* value)
return true;
}
template <typename CharT>
bool
RegExpParser<CharT>::ParseBracedHexEscape(size_t* value)
{
MOZ_ASSERT(current() == '{');
Advance();
bool first = true;
uint32_t code = 0;
while (true) {
widechar c = current();
if (c == kEndMarker) {
ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
return false;
}
if (c == '}') {
if (first) {
ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
return false;
}
Advance();
break;
}
int d = HexValue(c);
if (d < 0) {
ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
return false;
}
code = (code << 4) | d;
if (code > unicode::NonBMPMax) {
ReportError(JSMSG_UNICODE_OVERFLOW);
return false;
}
Advance();
first = false;
}
*value = code;
return true;
}
template <typename CharT>
bool
RegExpParser<CharT>::ParseTrailSurrogate(size_t* value)
{
if (current() != '\\')
return false;
const CharT* start = position();
Advance();
if (current() != 'u') {
Reset(start);
return false;
}
Advance();
if (!ParseHexEscape(4, value)) {
Reset(start);
return false;
}
if (!unicode::IsTrailSurrogate(*value)) {
Reset(start);
return false;
}
return true;
}
template <typename CharT>
bool
RegExpParser<CharT>::ParseRawSurrogatePair(char16_t* lead, char16_t* trail)
{
widechar c1 = current();
if (!unicode::IsLeadSurrogate(c1))
return false;
const CharT* start = position();
Advance();
widechar c2 = current();
if (!unicode::IsTrailSurrogate(c2)) {
Reset(start);
return false;
}
Advance();
*lead = c1;
*trail = c2;
return true;
}
static inline RegExpTree*
RangeAtom(LifoAlloc* alloc, char16_t from, char16_t to)
{
CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
ranges->append(CharacterRange::Range(from, to));
return alloc->newInfallible<RegExpCharacterClass>(ranges, false);
}
static inline RegExpTree*
NegativeLookahead(LifoAlloc* alloc, char16_t from, char16_t to)
{
return alloc->newInfallible<RegExpLookahead>(RangeAtom(alloc, from, to), false, 0, 0);
}
#ifdef DEBUG
// Currently only used in an assert.kASSERT.
static bool
@ -675,6 +777,35 @@ RegExpParser<CharT>::ParsePattern()
return result;
}
static inline RegExpTree*
SurrogatePairAtom(LifoAlloc* alloc, char16_t lead, char16_t trail)
{
RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
builder->AddCharacter(lead);
builder->AddCharacter(trail);
return builder->ToRegExp();
}
static inline RegExpTree*
LeadSurrogateAtom(LifoAlloc* alloc, char16_t value)
{
RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
builder->AddCharacter(value);
builder->AddAtom(NegativeLookahead(alloc, unicode::TrailSurrogateMin,
unicode::TrailSurrogateMax));
return builder->ToRegExp();
}
static inline RegExpTree*
TrailSurrogateAtom(LifoAlloc* alloc, char16_t value)
{
RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(
RegExpAssertion::NOT_AFTER_LEAD_SURROGATE));
builder->AddCharacter(value);
return builder->ToRegExp();
}
// Disjunction ::
// Alternative
// Alternative | Disjunction
@ -929,6 +1060,38 @@ RegExpParser<CharT>::ParseDisjunction()
case 'u': {
Advance(2);
size_t value;
if (unicode_) {
if (current() == '{') {
if (!ParseBracedHexEscape(&value))
return nullptr;
if (unicode::IsLeadSurrogate(value)) {
builder->AddAtom(LeadSurrogateAtom(alloc, value));
} else if (unicode::IsTrailSurrogate(value)) {
builder->AddAtom(TrailSurrogateAtom(alloc, value));
} else if (value >= unicode::NonBMPMin) {
size_t lead, trail;
unicode::UTF16Encode(value, &lead, &trail);
builder->AddAtom(SurrogatePairAtom(alloc, lead, trail));
} else {
builder->AddCharacter(value);
}
} else if (ParseHexEscape(4, &value)) {
if (unicode::IsLeadSurrogate(value)) {
size_t trail;
if (ParseTrailSurrogate(&trail))
builder->AddAtom(SurrogatePairAtom(alloc, value, trail));
else
builder->AddAtom(LeadSurrogateAtom(alloc, value));
} else if (unicode::IsTrailSurrogate(value)) {
builder->AddAtom(TrailSurrogateAtom(alloc, value));
} else {
builder->AddCharacter(value);
}
} else {
return ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
}
break;
}
if (ParseHexEscape(4, &value)) {
builder->AddCharacter(value);
} else {
@ -950,6 +1113,22 @@ RegExpParser<CharT>::ParseDisjunction()
// fallthrough
}
default:
if (unicode_) {
char16_t lead, trail;
if (ParseRawSurrogatePair(&lead, &trail)) {
builder->AddAtom(SurrogatePairAtom(alloc, lead, trail));
} else {
widechar c = current();
if (unicode::IsLeadSurrogate(c))
builder->AddAtom(LeadSurrogateAtom(alloc, c));
else if (unicode::IsTrailSurrogate(c))
builder->AddAtom(TrailSurrogateAtom(alloc, c));
else
builder->AddCharacter(c);
Advance();
}
break;
}
builder->AddCharacter(current());
Advance();
break;

View File

@ -193,6 +193,10 @@ class RegExpParser
// and sets the value if it is.
bool ParseHexEscape(int length, size_t* value);
bool ParseBracedHexEscape(size_t* value);
bool ParseTrailSurrogate(size_t* value);
bool ParseRawSurrogatePair(char16_t* lead, char16_t* trail);
size_t ParseOctalLiteral();
// Tries to parse the input as a back reference. If successful it

View File

@ -450,11 +450,14 @@ MSG_DEF(JSMSG_UNDEFINED_CURRENCY, 0, JSEXN_TYPEERR, "undefined currency in
MSG_DEF(JSMSG_BAD_CLASS_RANGE, 0, JSEXN_SYNTAXERR, "invalid range in character class")
MSG_DEF(JSMSG_ESCAPE_AT_END_OF_REGEXP, 0, JSEXN_SYNTAXERR, "\\ at end of pattern")
MSG_DEF(JSMSG_INVALID_GROUP, 0, JSEXN_SYNTAXERR, "invalid regexp group")
MSG_DEF(JSMSG_INVALID_IDENTITY_ESCAPE, 0, JSEXN_SYNTAXERR, "invalid identity escape in regular expression")
MSG_DEF(JSMSG_INVALID_UNICODE_ESCAPE, 0, JSEXN_SYNTAXERR, "invalid unicode escape in regular expression")
MSG_DEF(JSMSG_MISSING_PAREN, 0, JSEXN_SYNTAXERR, "unterminated parenthetical")
MSG_DEF(JSMSG_NEWREGEXP_FLAGGED, 0, JSEXN_TYPEERR, "can't supply flags when constructing one RegExp from another")
MSG_DEF(JSMSG_NOTHING_TO_REPEAT, 0, JSEXN_SYNTAXERR, "nothing to repeat")
MSG_DEF(JSMSG_NUMBERS_OUT_OF_ORDER, 0, JSEXN_SYNTAXERR, "numbers out of order in {} quantifier.")
MSG_DEF(JSMSG_TOO_MANY_PARENS, 0, JSEXN_INTERNALERR, "too many parentheses in regular expression")
MSG_DEF(JSMSG_UNICODE_OVERFLOW, 0, JSEXN_SYNTAXERR, "unicode codepoint should not be greater than 0x10FFFF in regular expression")
MSG_DEF(JSMSG_UNMATCHED_RIGHT_PAREN, 0, JSEXN_SYNTAXERR, "unmatched ) in regular expression")
MSG_DEF(JSMSG_UNTERM_CLASS, 0, JSEXN_SYNTAXERR, "unterminated character class")

View File

@ -0,0 +1,166 @@
var BUGNUMBER = 1135377;
var summary = "Implement RegExp unicode flag -- braced pattern in RegExpUnicodeEscapeSequence.";
print(BUGNUMBER + ": " + summary);
// ==== standalone ====
assertEqArray(/\u{41}/u.exec("ABC"),
["A"]);
assertEqArray(/\u{41}/.exec("ABC" + "u".repeat(41)),
["u".repeat(41)]);
assertEqArray(/\u{4A}/u.exec("JKL"),
["J"]);
assertEqArray(/\u{4A}/.exec("JKLu{4A}"),
["u{4A}"]);
assertEqArray(/\u{1F438}/u.exec("\u{1F438}"),
["\u{1F438}"]);
assertEqArray(/\u{1F438}/.exec("u{1F438}"),
["u{1F438}"]);
assertEqArray(/\u{0}/u.exec("\u{0}"),
["\u{0}"]);
assertEqArray(/\u{10FFFF}/u.exec("\u{10FFFF}"),
["\u{10FFFF}"]);
assertEqArray(/\u{10ffff}/u.exec("\u{10FFFF}"),
["\u{10FFFF}"]);
// leading 0
assertEqArray(/\u{0000000000000000000000}/u.exec("\u{0}"),
["\u{0}"]);
assertEqArray(/\u{000000000000000010FFFF}/u.exec("\u{10FFFF}"),
["\u{10FFFF}"]);
// RegExp constructor
assertEqArray(new RegExp("\\u{0}", "u").exec("\u{0}"),
["\u{0}"]);
assertEqArray(new RegExp("\\u{41}", "u").exec("ABC"),
["A"]);
assertEqArray(new RegExp("\\u{1F438}", "u").exec("\u{1F438}"),
["\u{1F438}"]);
assertEqArray(new RegExp("\\u{10FFFF}", "u").exec("\u{10FFFF}"),
["\u{10FFFF}"]);
assertEqArray(new RegExp("\\u{0000000000000000}", "u").exec("\u{0}"),
["\u{0}"]);
assertEqArray(eval(`/\\u{${"0".repeat(Math.pow(2, 24)) + "1234"}}/u`).exec("\u{1234}"),
["\u{1234}"]);
assertEqArray(new RegExp(`\\u{${"0".repeat(Math.pow(2, 24)) + "1234"}}`, "u").exec("\u{1234}"),
["\u{1234}"]);
// ==== ? ====
assertEqArray(/\u{1F438}?/u.exec("\u{1F438}"),
["\u{1F438}"]);
assertEqArray(/\u{1F438}?/u.exec(""),
[""]);
// lead-only target
assertEqArray(/\u{1F438}?/u.exec("\uD83D"),
[""]);
// RegExp constructor
assertEqArray(new RegExp("\\u{1F438}?", "u").exec("\u{1F438}"),
["\u{1F438}"]);
assertEqArray(new RegExp("\\u{1F438}?", "u").exec(""),
[""]);
assertEqArray(new RegExp("\\u{1F438}?", "u").exec("\uD83D"),
[""]);
// ==== + ====
assertEqArray(/\u{1F438}+/u.exec("\u{1F438}"),
["\u{1F438}"]);
assertEqArray(/\u{1F438}+/u.exec("\u{1F438}\u{1F438}"),
["\u{1F438}\u{1F438}"]);
assertEq(/\u{1F438}+/u.exec(""),
null);
// lead-only target
assertEq(/\u{1F438}+/u.exec("\uD83D"),
null);
assertEqArray(/\u{1F438}+/u.exec("\uD83D\uDC38\uDC38"),
["\uD83D\uDC38"]);
// ==== * ====
assertEqArray(/\u{1F438}*/u.exec("\u{1F438}"),
["\u{1F438}"]);
assertEqArray(/\u{1F438}*/u.exec("\u{1F438}\u{1F438}"),
["\u{1F438}\u{1F438}"]);
assertEqArray(/\u{1F438}*/u.exec(""),
[""]);
// lead-only target
assertEqArray(/\u{1F438}*/u.exec("\uD83D"),
[""]);
assertEqArray(/\u{1F438}*/u.exec("\uD83D\uDC38\uDC38"),
["\uD83D\uDC38"]);
// ==== lead-only ====
// match only non-surrogate pair
assertEqArray(/\u{D83D}/u.exec("\uD83D\uDBFF"),
["\uD83D"]);
assertEq(/\u{D83D}/u.exec("\uD83D\uDC00"),
null);
assertEq(/\u{D83D}/u.exec("\uD83D\uDFFF"),
null);
assertEqArray(/\u{D83D}/u.exec("\uD83D\uE000"),
["\uD83D"]);
// match before non-tail char
assertEqArray(/\u{D83D}/u.exec("\uD83D"),
["\uD83D"]);
assertEqArray(/\u{D83D}/u.exec("\uD83DA"),
["\uD83D"]);
// ==== trail-only ====
// match only non-surrogate pair
assertEqArray(/\u{DC38}/u.exec("\uD7FF\uDC38"),
["\uDC38"]);
assertEq(/\u{DC38}/u.exec("\uD800\uDC38"),
null);
assertEq(/\u{DC38}/u.exec("\uDBFF\uDC38"),
null);
assertEqArray(/\u{DC38}/u.exec("\uDC00\uDC38"),
["\uDC38"]);
// match after non-lead char
assertEqArray(/\u{DC38}/u.exec("\uDC38"),
["\uDC38"]);
assertEqArray(/\u{DC38}/u.exec("A\uDC38"),
["\uDC38"]);
// ==== wrong patterns ====
assertThrowsInstanceOf(() => eval(`/\\u{-1}/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\u{0.0}/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\u{G}/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\u{}/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\u{{/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\u{/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\u{110000}/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\u{00110000}/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\u{100000000000000000000000000000}/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\u{FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF}/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\u{ FFFF}/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\u{FFFF }/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\u{FF FF}/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\u{F F F F}/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\u{100000001}/u`), SyntaxError);
// surrogate pair with braced
assertEq(/\u{D83D}\u{DC38}+/u.exec("\uD83D\uDC38\uDC38"),
null);
assertEq(/\uD83D\u{DC38}+/u.exec("\uD83D\uDC38\uDC38"),
null);
assertEq(/\u{D83D}\uDC38+/u.exec("\uD83D\uDC38\uDC38"),
null);
if (typeof reportCompare === "function")
reportCompare(true, true);

View File

@ -0,0 +1,218 @@
var BUGNUMBER = 1135377;
var summary = "Implement RegExp unicode flag -- lead and trail patterns in RegExpUnicodeEscapeSequence.";
print(BUGNUMBER + ": " + summary);
// ==== standalone ====
assertEqArray(/\uD83D\uDC38/u.exec("\u{1F438}"),
["\u{1F438}"]);
// no unicode flag
assertEqArray(/\uD83D\uDC38/.exec("\u{1F438}"),
["\u{1F438}"]);
// RegExp constructor
assertEqArray(new RegExp("\\uD83D\\uDC38", "u").exec("\u{1F438}"),
["\u{1F438}"]);
// RegExp constructor, no unicode flag
assertEqArray(new RegExp("\\uD83D\\uDC38", "").exec("\u{1F438}"),
["\u{1F438}"]);
// ==== ? ====
assertEqArray(/\uD83D\uDC38?/u.exec("\u{1F438}"),
["\u{1F438}"]);
assertEqArray(/\uD83D\uDC38?/u.exec(""),
[""]);
// lead-only target
assertEqArray(/\uD83D\uDC38?/u.exec("\uD83D"),
[""]);
// no unicode flag
assertEqArray(/\uD83D\uDC38?/.exec("\u{1F438}"),
["\u{1F438}"]);
assertEq(/\uD83D\uDC38?/.exec(""),
null);
assertEqArray(/\uD83D\uDC38?/.exec("\uD83D"),
["\uD83D"]);
// RegExp constructor
assertEqArray(new RegExp("\\uD83D\\uDC38?", "u").exec("\u{1F438}"),
["\u{1F438}"]);
assertEqArray(new RegExp("\\uD83D\\uDC38?", "u").exec(""),
[""]);
assertEqArray(new RegExp("\\uD83D\\uDC38?", "u").exec("\uD83D"),
[""]);
// RegExp constructor, no unicode flag
assertEqArray(new RegExp("\\uD83D\\uDC38?", "").exec("\u{1F438}"),
["\u{1F438}"]);
assertEq(new RegExp("\\uD83D\\uDC38?", "").exec(""),
null);
assertEqArray(new RegExp("\\uD83D\\uDC38?", "").exec("\uD83D"),
["\uD83D"]);
// ==== + ====
assertEqArray(/\uD83D\uDC38+/u.exec("\u{1F438}"),
["\u{1F438}"]);
assertEqArray(/\uD83D\uDC38+/u.exec("\u{1F438}\u{1F438}"),
["\u{1F438}\u{1F438}"]);
assertEq(/\uD83D\uDC38+/u.exec(""),
null);
// lead-only target
assertEq(/\uD83D\uDC38+/u.exec("\uD83D"),
null);
assertEqArray(/\uD83D\uDC38+/u.exec("\uD83D\uDC38\uDC38"),
["\uD83D\uDC38"]);
// no unicode flag
assertEqArray(/\uD83D\uDC38+/.exec("\u{1F438}"),
["\u{1F438}"]);
assertEqArray(/\uD83D\uDC38+/.exec("\u{1F438}\u{1F438}"),
["\u{1F438}"]);
assertEq(/\uD83D\uDC38+/.exec("\uD83D"),
null);
assertEqArray(/\uD83D\uDC38+/.exec("\uD83D\uDC38\uDC38"),
["\uD83D\uDC38\uDC38"]);
assertEq(/\uD83D\uDC38+/.exec(""),
null);
// ==== * ====
assertEqArray(/\uD83D\uDC38*/u.exec("\u{1F438}"),
["\u{1F438}"]);
assertEqArray(/\uD83D\uDC38*/u.exec("\u{1F438}\u{1F438}"),
["\u{1F438}\u{1F438}"]);
assertEqArray(/\uD83D\uDC38*/u.exec(""),
[""]);
// lead-only target
assertEqArray(/\uD83D\uDC38*/u.exec("\uD83D"),
[""]);
assertEqArray(/\uD83D\uDC38*/u.exec("\uD83D\uDC38\uDC38"),
["\uD83D\uDC38"]);
// no unicode flag
assertEqArray(/\uD83D\uDC38*/.exec("\u{1F438}"),
["\u{1F438}"]);
assertEqArray(/\uD83D\uDC38*/.exec("\u{1F438}\u{1F438}"),
["\u{1F438}"]);
assertEqArray(/\uD83D\uDC38*/.exec("\uD83D"),
["\uD83D"]);
assertEqArray(/\uD83D\uDC38*/.exec("\uD83D\uDC38\uDC38"),
["\uD83D\uDC38\uDC38"]);
assertEq(/\uD83D\uDC38*/.exec(""),
null);
// ==== lead-only ====
// match only non-surrogate pair
assertEqArray(/\uD83D/u.exec("\uD83D\uDBFF"),
["\uD83D"]);
assertEq(/\uD83D/u.exec("\uD83D\uDC00"),
null);
assertEq(/\uD83D/u.exec("\uD83D\uDFFF"),
null);
assertEqArray(/\uD83D/u.exec("\uD83D\uE000"),
["\uD83D"]);
// match before non-tail char
assertEqArray(/\uD83D/u.exec("\uD83D"),
["\uD83D"]);
assertEqArray(/\uD83D/u.exec("\uD83DA"),
["\uD83D"]);
// no unicode flag
assertEqArray(/\uD83D/.exec("\uD83D\uDBFF"),
["\uD83D"]);
assertEqArray(/\uD83D/.exec("\uD83D\uDC00"),
["\uD83D"]);
assertEqArray(/\uD83D/.exec("\uD83D\uDFFF"),
["\uD83D"]);
assertEqArray(/\uD83D/.exec("\uD83D\uE000"),
["\uD83D"]);
assertEqArray(/\uD83D/.exec("\uD83D"),
["\uD83D"]);
assertEqArray(/\uD83D/.exec("\uD83DA"),
["\uD83D"]);
// ==== trail-only ====
// match only non-surrogate pair
assertEqArray(/\uDC38/u.exec("\uD7FF\uDC38"),
["\uDC38"]);
assertEq(/\uDC38/u.exec("\uD800\uDC38"),
null);
assertEq(/\uDC38/u.exec("\uDBFF\uDC38"),
null);
assertEqArray(/\uDC38/u.exec("\uDC00\uDC38"),
["\uDC38"]);
// match after non-lead char
assertEqArray(/\uDC38/u.exec("\uDC38"),
["\uDC38"]);
assertEqArray(/\uDC38/u.exec("A\uDC38"),
["\uDC38"]);
// no unicode flag
assertEqArray(/\uDC38/.exec("\uD7FF\uDC38"),
["\uDC38"]);
assertEqArray(/\uDC38/.exec("\uD800\uDC38"),
["\uDC38"]);
assertEqArray(/\uDC38/.exec("\uDBFF\uDC38"),
["\uDC38"]);
assertEqArray(/\uDC38/.exec("\uDC00\uDC38"),
["\uDC38"]);
assertEqArray(/\uDC38/.exec("\uDC38"),
["\uDC38"]);
assertEqArray(/\uDC38/.exec("A\uDC38"),
["\uDC38"]);
// ==== invalid trail ====
assertEqArray(/\uD83D\u3042*/u.exec("\uD83D"),
["\uD83D"]);
assertEqArray(/\uD83D\u3042*/u.exec("\uD83D\u3042"),
["\uD83D\u3042"]);
assertEqArray(/\uD83D\u3042*/u.exec("\uD83D\u3042\u3042"),
["\uD83D\u3042\u3042"]);
assertEqArray(/\uD83D\u{3042}*/u.exec("\uD83D"),
["\uD83D"]);
assertEqArray(/\uD83D\u{3042}*/u.exec("\uD83D\u3042"),
["\uD83D\u3042"]);
assertEqArray(/\uD83D\u{3042}*/u.exec("\uD83D\u3042\u3042"),
["\uD83D\u3042\u3042"]);
assertEqArray(/\uD83DA*/u.exec("\uD83D"),
["\uD83D"]);
assertEqArray(/\uD83DA*/u.exec("\uD83DA"),
["\uD83DA"]);
assertEqArray(/\uD83DA*/u.exec("\uD83DAA"),
["\uD83DAA"]);
// ==== wrong patterns ====
assertThrowsInstanceOf(() => eval(`/\\u/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\u0/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\u00/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\u000/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\u000G/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\u0.00/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\uD83D\\u/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\uD83D\\u0/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\uD83D\\u00/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\uD83D\\u000/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\uD83D\\u000G/u`), SyntaxError);
assertThrowsInstanceOf(() => eval(`/\\uD83D\\u0.00/u`), SyntaxError);
if (typeof reportCompare === "function")
reportCompare(true, true);

View File

@ -0,0 +1,139 @@
var BUGNUMBER = 1135377;
var summary = "Implement RegExp unicode flag -- raw unicode.";
print(BUGNUMBER + ": " + summary);
// ==== standalone ====
assertEqArray(eval(`/\uD83D\uDC38/u`).exec("\u{1F438}"),
["\u{1F438}"]);
// no unicode flag
assertEqArray(eval(`/\uD83D\uDC38/`).exec("\u{1F438}"),
["\u{1F438}"]);
// escaped (lead)
assertEq(eval(`/\\uD83D\uDC38/u`).exec("\u{1F438}"),
null);
assertEq(eval(`/\\u{D83D}\uDC38/u`).exec("\u{1F438}"),
null);
// escaped (trail)
assertEq(eval(`/\uD83D\\uDC38/u`).exec("\u{1F438}"),
null);
assertEq(eval(`/\uD83D\\u{DC38}/u`).exec("\u{1F438}"),
null);
// escaped (lead), no unicode flag
assertEqArray(eval(`/\\uD83D\uDC38/`).exec("\u{1F438}"),
["\u{1F438}"]);
// escaped (trail), no unicode flag
assertEqArray(eval(`/\uD83D\\uDC38/`).exec("\u{1F438}"),
["\u{1F438}"]);
// ==== RegExp constructor ====
assertEqArray(new RegExp("\uD83D\uDC38", "u").exec("\u{1F438}"),
["\u{1F438}"]);
// no unicode flag
assertEqArray(new RegExp("\uD83D\uDC38", "").exec("\u{1F438}"),
["\u{1F438}"]);
// escaped(lead)
assertEq(new RegExp("\\uD83D\uDC38", "u").exec("\u{1F438}"),
null);
assertEq(new RegExp("\\u{D83D}\uDC38", "u").exec("\u{1F438}"),
null);
// escaped(trail)
assertEq(new RegExp("\uD83D\\uDC38", "u").exec("\u{1F438}"),
null);
assertEq(new RegExp("\uD83D\\u{DC38}", "u").exec("\u{1F438}"),
null);
// escaped(lead), no unicode flag
assertEqArray(new RegExp("\\uD83D\uDC38", "").exec("\u{1F438}"),
["\u{1F438}"]);
// escaped(trail), no unicode flag
assertEqArray(new RegExp("\uD83D\\uDC38", "").exec("\u{1F438}"),
["\u{1F438}"]);
// ==== ? ====
assertEqArray(eval(`/\uD83D\uDC38?/u`).exec("\u{1F438}"),
["\u{1F438}"]);
assertEqArray(eval(`/\uD83D\uDC38?/u`).exec(""),
[""]);
assertEqArray(eval(`/\uD83D\uDC38?/u`).exec("\uD83D"),
[""]);
// no unicode flag
assertEqArray(eval(`/\uD83D\uDC38?/`).exec("\u{1F438}"),
["\u{1F438}"]);
assertEq(eval(`/\uD83D\uDC38?/`).exec(""),
null);
assertEqArray(eval(`/\uD83D\uDC38?/`).exec("\uD83D"),
["\uD83D"]);
// escaped (lead)
assertEq(eval(`/\\uD83D\uDC38?/u`).exec("\u{1F438}"),
null);
assertEq(eval(`/\\uD83D\uDC38?/u`).exec(""),
null);
assertEqArray(eval(`/\\uD83D\uDC38?/u`).exec("\uD83D"),
["\uD83D"]);
// escaped (trail)
assertEq(eval(`/\uD83D\\uDC38?/u`).exec("\u{1F438}"),
null);
assertEq(eval(`/\uD83D\\uDC38?/u`).exec(""),
null);
assertEqArray(eval(`/\uD83D\\uDC38?/u`).exec("\uD83D"),
["\uD83D"]);
// escaped (lead), no unicode flag
assertEqArray(eval(`/\\uD83D\uDC38?/`).exec("\u{1F438}"),
["\u{1F438}"]);
assertEq(eval(`/\\uD83D\uDC38?/`).exec(""),
null);
assertEqArray(eval(`/\\uD83D\uDC38?/`).exec("\uD83D"),
["\uD83D"]);
// escaped (trail), no unicode flag
assertEqArray(eval(`/\uD83D\\uDC38?/`).exec("\u{1F438}"),
["\u{1F438}"]);
assertEq(eval(`/\uD83D\\uDC38?/`).exec(""),
null);
assertEqArray(eval(`/\uD83D\\uDC38?/`).exec("\uD83D"),
["\uD83D"]);
// ==== RegExp constructor, ? ====
assertEqArray(new RegExp("\uD83D\uDC38?", "u").exec("\u{1F438}"),
["\u{1F438}"]);
assertEqArray(new RegExp("\uD83D\uDC38?", "u").exec(""),
[""]);
assertEqArray(new RegExp("\uD83D\uDC38?", "u").exec("\uD83D"),
[""]);
// no unicode flag
assertEqArray(new RegExp("\uD83D\uDC38?", "").exec("\u{1F438}"),
["\u{1F438}"]);
assertEq(new RegExp("\uD83D\uDC38?", "").exec(""),
null);
assertEqArray(new RegExp("\uD83D\uDC38?", "").exec("\uD83D"),
["\uD83D"]);
if (typeof reportCompare === "function")
reportCompare(true, true);

View File

@ -234,6 +234,44 @@ CanLowerCase(char16_t ch)
return CharInfo(ch).lowerCase != 0;
}
const size_t LeadSurrogateMin = 0xD800;
const size_t LeadSurrogateMax = 0xDBFF;
const size_t TrailSurrogateMin = 0xDC00;
const size_t TrailSurrogateMax = 0xDFFF;
const size_t UTF16Max = 0xFFFF;
const size_t NonBMPMin = 0x10000;
const size_t NonBMPMax = 0x10FFFF;
inline bool
IsLeadSurrogate(size_t value)
{
return value >= LeadSurrogateMin && value <= LeadSurrogateMax;
}
inline bool
IsTrailSurrogate(size_t value)
{
return value >= TrailSurrogateMin && value <= TrailSurrogateMax;
}
inline void
UTF16Encode(size_t cp, size_t* lead, size_t* trail)
{
MOZ_ASSERT(cp >= NonBMPMin && cp <= NonBMPMax);
*lead = (cp - NonBMPMin) / 1024 + LeadSurrogateMin;
*trail = ((cp - NonBMPMin) % 1024) + TrailSurrogateMin;
}
inline size_t
UTF16Decode(size_t lead, size_t trail)
{
MOZ_ASSERT(IsLeadSurrogate(lead));
MOZ_ASSERT(IsTrailSurrogate(trail));
return (lead - LeadSurrogateMin) * 1024 + (trail - TrailSurrogateMin) + NonBMPMin;
}
} /* namespace unicode */
} /* namespace js */

View File

@ -29,11 +29,11 @@ namespace js {
*
* https://developer.mozilla.org/en-US/docs/SpiderMonkey/Internals/Bytecode
*/
static const uint32_t XDR_BYTECODE_VERSION_SUBTRAHEND = 332;
static const uint32_t XDR_BYTECODE_VERSION_SUBTRAHEND = 333;
static const uint32_t XDR_BYTECODE_VERSION =
uint32_t(0xb973c0de - XDR_BYTECODE_VERSION_SUBTRAHEND);
static_assert(JSErr_Limit == 425,
static_assert(JSErr_Limit == 428,
"GREETINGS, POTENTIAL SUBTRAHEND INCREMENTER! If you added or "
"removed MSG_DEFs from js.msg, you should increment "
"XDR_BYTECODE_VERSION_SUBTRAHEND and update this assertion's "