#393, Bug 1135377 - Part 11: Support back reference with unicode flag. r=till, f=anba

This commit is contained in:
Tooru Fujisawa
2015-12-19 04:51:21 +09:00
committed by Cameron Kaiser
parent 522d06ab34
commit 122e41a28c
5 changed files with 105 additions and 3 deletions

View File

@ -1374,6 +1374,21 @@ UnicodeCharacterClassEscapeAtom(LifoAlloc* alloc, char16_t char_class, bool igno
return UnicodeRangesAtom(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, false, false);
}
static inline RegExpTree*
UnicodeBackReferenceAtom(LifoAlloc* alloc, RegExpTree* atom)
{
// If a back reference has a standalone lead surrogate as its last
// character, then that lead surrogate shouldn't match lead surrogates that
// are paired with a corresponding trail surrogate.
RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
builder->AddAtom(atom);
builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(
RegExpAssertion::NOT_IN_SURROGATE_PAIR));
return builder->ToRegExp();
}
// Disjunction ::
// Alternative
// Alternative | Disjunction
@ -1575,7 +1590,10 @@ RegExpParser<CharT>::ParseDisjunction()
break;
}
RegExpTree* atom = alloc->newInfallible<RegExpBackReference>(capture);
builder->AddAtom(atom);
if (unicode_)
builder->AddAtom(UnicodeBackReferenceAtom(alloc, atom));
else
builder->AddAtom(atom);
break;
}
if (unicode_)