#415: M1373195 backport

This commit is contained in:
Cameron Kaiser 2017-07-08 22:55:44 -07:00
parent 824c777eef
commit e880223afd
4 changed files with 37 additions and 60 deletions

View File

@ -1,50 +0,0 @@
diff a/js/src/irregexp/NativeRegExpMacroAssembler.cpp b/js/src/irregexp/NativeRegExpMacroAssembler.cpp (rejected hunks)
@@ -714,19 +714,20 @@ NativeRegExpMacroAssembler::CheckNotBack
// Restore backtrack stack pointer.
masm.pop(backtrack_stack_pointer);
masm.bind(&fallthrough);
}
void
-NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, Label* on_no_match)
+NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, Label* on_no_match,
+ bool unicode)
{
- JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d)", start_reg);
+ JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d, %d)", start_reg, unicode);
Label fallthrough;
masm.loadPtr(register_location(start_reg), current_character); // Index of start of capture
masm.loadPtr(register_location(start_reg + 1), temp1); // Index of end of capture
masm.subPtr(current_character, temp1); // Length of capture.
// The length of a capture should not be negative. This can only happen
@@ -828,18 +829,23 @@ NativeRegExpMacroAssembler::CheckNotBack
// Parameters are
// Address byte_offset1 - Address captured substring's start.
// Address byte_offset2 - Address of current character position.
// size_t byte_length - length of capture in bytes(!)
masm.setupUnalignedABICall(temp0);
masm.passABIArg(current_character);
masm.passABIArg(current_position);
masm.passABIArg(temp1);
- int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings;
- masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
+ if (!unicode) {
+ int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings;
+ masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
+ } else {
+ int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareUCStrings;
+ masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
+ }
masm.storeCallResult(temp0);
masm.PopRegsInMask(volatileRegs);
// Check if function returned non-zero for success or zero for failure.
masm.branchTest32(Assembler::Zero, temp0, temp0, BranchOrBacktrack(on_no_match));
// On success, increment position by length of capture.

View File

@ -116,6 +116,10 @@ static const int kSurrogateRangeCount = ArrayLength(kSurrogateRanges);
static const int kLineTerminatorRanges[] = { 0x000A, 0x000B, 0x000D, 0x000E,
0x2028, 0x202A, 0x10000 };
static const int kLineTerminatorRangeCount = ArrayLength(kLineTerminatorRanges);
// bug 1373195
static const int kLineTerminatorAndSurrogateRanges[] = { 0x000A, 0x000B,
0x000D, 0x000E, 0x2028, 0x202A, 0xD800, 0xE000, 0x10000 };
static const int kLineTerminatorAndSurrogateRangeCount = ArrayLength(kLineTerminatorAndSurrogateRanges);
static const int kMaxOneByteCharCode = 0xff;
static const int kMaxUtf16CodeUnit = 0xffff;
@ -137,10 +141,10 @@ AddClass(const int* elmv, int elmc,
}
}
static void
AddClassNegated(const int* elmv,
int elmc,
CharacterRangeVector* ranges)
void
js::irregexp::AddClassNegated(const int* elmv,
int elmc,
CharacterRangeVector* ranges)
{
elmc--;
MOZ_ASSERT(elmv[elmc] == 0x10000);
@ -275,7 +279,7 @@ static const size_t kEcma262UnCanonicalizeMaxWidth = 4;
// Returns the number of characters in the equivalence class, omitting those
// that cannot occur in the source string if it is a one byte string.
static int
static MOZ_ALWAYS_INLINE int
GetCaseIndependentLetters(char16_t character,
bool ascii_subject,
bool unicode,
@ -378,6 +382,10 @@ CharacterRange::AddCaseEquivalents(bool is_ascii, bool unicode, CharacterRangeVe
return;
if (top > kMaxOneByteCharCode)
top = kMaxOneByteCharCode;
} else {
// Nothing to do for surrogates.
if (bottom >= unicode::LeadSurrogateMin && top <= unicode::TrailSurrogateMax)
return;
}
for (char16_t c = bottom;; c++) {
@ -917,7 +925,17 @@ void TextNode::MakeCaseIndependent(bool is_ascii, bool unicode)
if (cc->is_standard(alloc()))
continue;
// Similarly, there's nothing to do for the character class
// containing all characters except line terminators and surrogates.
// This one is added by UnicodeEverythingAtom.
CharacterRangeVector& ranges = cc->ranges(alloc());
if (CompareInverseRanges(ranges,
kLineTerminatorAndSurrogateRanges,
kLineTerminatorAndSurrogateRangeCount))
{
continue;
}
int range_count = ranges.length();
for (int j = 0; j < range_count; j++)
ranges[j].AddCaseEquivalents(is_ascii, unicode, &ranges);

View File

@ -1508,6 +1508,9 @@ class Analysis : public NodeVisitor
void operator=(Analysis&) = delete;
};
void
AddClassNegated(const int* elmv, int elmc, CharacterRangeVector* ranges);
} } // namespace js::irregexp
#endif // V8_JSREGEXP_H_

View File

@ -35,6 +35,14 @@
using namespace js;
using namespace js::irregexp;
// Bug 1373195 put these into RegExpCharacters, but we don't have that
// in this version of irregexp, so this should be kept in sync with
// RegExpEngine.
static const int kLineTerminatorAndSurrogateRanges[] = { 0x000A, 0x000B,
0x000D, 0x000E, 0x2028, 0x202A, 0xD800, 0xE000, 0x10000 };
static const int kLineTerminatorAndSurrogateRangeCount = 9;
// ----------------------------------------------------------------------------
// RegExpBuilder
@ -1335,11 +1343,9 @@ UnicodeEverythingAtom(LifoAlloc* alloc)
// everything except \x0a, \x0d, \u2028 and \u2029
CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
ranges->append(CharacterRange::Range(0x0, 0x09));
ranges->append(CharacterRange::Range(0x0b, 0x0c));
ranges->append(CharacterRange::Range(0x0e, 0x2027));
ranges->append(CharacterRange::Range(0x202A, unicode::LeadSurrogateMin - 1));
ranges->append(CharacterRange::Range(unicode::TrailSurrogateMax + 1, unicode::UTF16Max));
AddClassNegated(kLineTerminatorAndSurrogateRanges,
kLineTerminatorAndSurrogateRangeCount,
ranges);
builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(ranges, false));
builder->NewAlternative();