mirror of
https://github.com/classilla/tenfourfox.git
synced 2024-06-01 01:41:37 +00:00
#393, Bug 1135377 - Part 4: Support everything Atom in RegExp with unicode flag. r=till, f=anba
This commit is contained in:
parent
68f44ec410
commit
4e924a688e
|
@ -1178,6 +1178,41 @@ TrailSurrogateAtom(LifoAlloc* alloc, char16_t value)
|
|||
return builder->ToRegExp();
|
||||
}
|
||||
|
||||
static inline RegExpTree*
|
||||
UnicodeEverythingAtom(LifoAlloc* alloc)
|
||||
{
|
||||
RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
|
||||
|
||||
// everything except \x0a, \x0d, \u2028 and \u2029
|
||||
|
||||
CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
|
||||
ranges->append(CharacterRange::Range(0x0, 0x09));
|
||||
ranges->append(CharacterRange::Range(0x0b, 0x0c));
|
||||
ranges->append(CharacterRange::Range(0x0e, 0x2027));
|
||||
ranges->append(CharacterRange::Range(0x202A, unicode::LeadSurrogateMin - 1));
|
||||
ranges->append(CharacterRange::Range(unicode::TrailSurrogateMax + 1, unicode::UTF16Max));
|
||||
builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(ranges, false));
|
||||
|
||||
builder->NewAlternative();
|
||||
|
||||
builder->AddAtom(RangeAtom(alloc, unicode::LeadSurrogateMin, unicode::LeadSurrogateMax));
|
||||
builder->AddAtom(NegativeLookahead(alloc, unicode::TrailSurrogateMin,
|
||||
unicode::TrailSurrogateMax));
|
||||
|
||||
builder->NewAlternative();
|
||||
|
||||
builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(
|
||||
RegExpAssertion::NOT_AFTER_LEAD_SURROGATE));
|
||||
builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, unicode::TrailSurrogateMax));
|
||||
|
||||
builder->NewAlternative();
|
||||
|
||||
builder->AddAtom(RangeAtom(alloc, unicode::LeadSurrogateMin, unicode::LeadSurrogateMax));
|
||||
builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, unicode::TrailSurrogateMax));
|
||||
|
||||
return builder->ToRegExp();
|
||||
}
|
||||
|
||||
// Disjunction ::
|
||||
// Alternative
|
||||
// Alternative | Disjunction
|
||||
|
@ -1275,6 +1310,10 @@ RegExpParser<CharT>::ParseDisjunction()
|
|||
case '.': {
|
||||
Advance();
|
||||
// everything except \x0a, \x0d, \u2028 and \u2029
|
||||
if (unicode_) {
|
||||
builder->AddAtom(UnicodeEverythingAtom(alloc));
|
||||
break;
|
||||
}
|
||||
CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
|
||||
CharacterRange::AddClassEscape(alloc, '.', ranges);
|
||||
RegExpTree* atom = alloc->newInfallible<RegExpCharacterClass>(ranges, false);
|
||||
|
|
59
js/src/tests/ecma_6/RegExp/unicode-everything.js
Normal file
59
js/src/tests/ecma_6/RegExp/unicode-everything.js
Normal file
|
@ -0,0 +1,59 @@
|
|||
var BUGNUMBER = 1135377;
|
||||
var summary = "Implement RegExp unicode flag -- everything Atom.";
|
||||
|
||||
print(BUGNUMBER + ": " + summary);
|
||||
|
||||
// ==== standalone ====
|
||||
|
||||
assertEqArray(/./u.exec("ABC"),
|
||||
["A"]);
|
||||
assertEqArray(/./u.exec("\u{1F438}BC"),
|
||||
["\u{1F438}"]);
|
||||
|
||||
assertEqArray(/./u.exec("\uD83D\uDBFF"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/./u.exec("\uD83D\uDC00"),
|
||||
["\uD83D\uDC00"]);
|
||||
assertEqArray(/./u.exec("\uD83D\uDFFF"),
|
||||
["\uD83D\uDFFF"]);
|
||||
assertEqArray(/./u.exec("\uD83D\uE000"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/./u.exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/./u.exec("\uD83DA"),
|
||||
["\uD83D"]);
|
||||
|
||||
assertEqArray(/./u.exec("\uD7FF\uDC38"),
|
||||
["\uD7FF"]);
|
||||
assertEqArray(/./u.exec("\uD800\uDC38"),
|
||||
["\uD800\uDC38"]);
|
||||
assertEqArray(/./u.exec("\uDBFF\uDC38"),
|
||||
["\uDBFF\uDC38"]);
|
||||
assertEqArray(/./u.exec("\uDC00\uDC38"),
|
||||
["\uDC00"]);
|
||||
assertEqArray(/./u.exec("\uDC38"),
|
||||
["\uDC38"]);
|
||||
assertEqArray(/./u.exec("A\uDC38"),
|
||||
["A"]);
|
||||
|
||||
assertEqArray(/.A/u.exec("\uD7FF\uDC38A"),
|
||||
["\uDC38A"]);
|
||||
assertEqArray(/.A/u.exec("\uD800\uDC38A"),
|
||||
["\uD800\uDC38A"]);
|
||||
assertEqArray(/.A/u.exec("\uDBFF\uDC38A"),
|
||||
["\uDBFF\uDC38A"]);
|
||||
assertEqArray(/.A/u.exec("\uDC00\uDC38A"),
|
||||
["\uDC38A"]);
|
||||
|
||||
// ==== leading multiple ====
|
||||
|
||||
assertEqArray(/.*A/u.exec("\u{1F438}\u{1F438}\u{1F438}A"),
|
||||
["\u{1F438}\u{1F438}\u{1F438}A"]);
|
||||
|
||||
// ==== trailing multiple ====
|
||||
|
||||
assertEqArray(/A.*/u.exec("A\u{1F438}\u{1F438}\u{1F438}"),
|
||||
["A\u{1F438}\u{1F438}\u{1F438}"]);
|
||||
|
||||
if (typeof reportCompare === "function")
|
||||
reportCompare(true, true);
|
Loading…
Reference in New Issue
Block a user