diff --git a/js/src/irregexp/NativeRegExpMacroAssembler.cpp.rej b/js/src/irregexp/NativeRegExpMacroAssembler.cpp.rej deleted file mode 100644 index c0fc9b38c..000000000 --- a/js/src/irregexp/NativeRegExpMacroAssembler.cpp.rej +++ /dev/null @@ -1,50 +0,0 @@ -diff a/js/src/irregexp/NativeRegExpMacroAssembler.cpp b/js/src/irregexp/NativeRegExpMacroAssembler.cpp (rejected hunks) -@@ -714,19 +714,20 @@ NativeRegExpMacroAssembler::CheckNotBack - - // Restore backtrack stack pointer. - masm.pop(backtrack_stack_pointer); - - masm.bind(&fallthrough); - } - - void --NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, Label* on_no_match) -+NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, Label* on_no_match, -+ bool unicode) - { -- JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d)", start_reg); -+ JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d, %d)", start_reg, unicode); - - Label fallthrough; - - masm.loadPtr(register_location(start_reg), current_character); // Index of start of capture - masm.loadPtr(register_location(start_reg + 1), temp1); // Index of end of capture - masm.subPtr(current_character, temp1); // Length of capture. - - // The length of a capture should not be negative. This can only happen -@@ -828,18 +829,23 @@ NativeRegExpMacroAssembler::CheckNotBack - // Parameters are - // Address byte_offset1 - Address captured substring's start. - // Address byte_offset2 - Address of current character position. - // size_t byte_length - length of capture in bytes(!) - masm.setupUnalignedABICall(temp0); - masm.passABIArg(current_character); - masm.passABIArg(current_position); - masm.passABIArg(temp1); -- int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings; -- masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun)); -+ if (!unicode) { -+ int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings; -+ masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun)); -+ } else { -+ int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareUCStrings; -+ masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun)); -+ } - masm.storeCallResult(temp0); - - masm.PopRegsInMask(volatileRegs); - - // Check if function returned non-zero for success or zero for failure. - masm.branchTest32(Assembler::Zero, temp0, temp0, BranchOrBacktrack(on_no_match)); - - // On success, increment position by length of capture. diff --git a/js/src/irregexp/RegExpEngine.cpp b/js/src/irregexp/RegExpEngine.cpp index 8ba805ce5..69da8912c 100644 --- a/js/src/irregexp/RegExpEngine.cpp +++ b/js/src/irregexp/RegExpEngine.cpp @@ -116,6 +116,10 @@ static const int kSurrogateRangeCount = ArrayLength(kSurrogateRanges); static const int kLineTerminatorRanges[] = { 0x000A, 0x000B, 0x000D, 0x000E, 0x2028, 0x202A, 0x10000 }; static const int kLineTerminatorRangeCount = ArrayLength(kLineTerminatorRanges); +// bug 1373195 +static const int kLineTerminatorAndSurrogateRanges[] = { 0x000A, 0x000B, + 0x000D, 0x000E, 0x2028, 0x202A, 0xD800, 0xE000, 0x10000 }; +static const int kLineTerminatorAndSurrogateRangeCount = ArrayLength(kLineTerminatorAndSurrogateRanges); static const int kMaxOneByteCharCode = 0xff; static const int kMaxUtf16CodeUnit = 0xffff; @@ -137,10 +141,10 @@ AddClass(const int* elmv, int elmc, } } -static void -AddClassNegated(const int* elmv, - int elmc, - CharacterRangeVector* ranges) +void +js::irregexp::AddClassNegated(const int* elmv, + int elmc, + CharacterRangeVector* ranges) { elmc--; MOZ_ASSERT(elmv[elmc] == 0x10000); @@ -275,7 +279,7 @@ static const size_t kEcma262UnCanonicalizeMaxWidth = 4; // Returns the number of characters in the equivalence class, omitting those // that cannot occur in the source string if it is a one byte string. -static int +static MOZ_ALWAYS_INLINE int GetCaseIndependentLetters(char16_t character, bool ascii_subject, bool unicode, @@ -378,6 +382,10 @@ CharacterRange::AddCaseEquivalents(bool is_ascii, bool unicode, CharacterRangeVe return; if (top > kMaxOneByteCharCode) top = kMaxOneByteCharCode; + } else { + // Nothing to do for surrogates. + if (bottom >= unicode::LeadSurrogateMin && top <= unicode::TrailSurrogateMax) + return; } for (char16_t c = bottom;; c++) { @@ -917,7 +925,17 @@ void TextNode::MakeCaseIndependent(bool is_ascii, bool unicode) if (cc->is_standard(alloc())) continue; + // Similarly, there's nothing to do for the character class + // containing all characters except line terminators and surrogates. + // This one is added by UnicodeEverythingAtom. CharacterRangeVector& ranges = cc->ranges(alloc()); + if (CompareInverseRanges(ranges, + kLineTerminatorAndSurrogateRanges, + kLineTerminatorAndSurrogateRangeCount)) + { + continue; + } + int range_count = ranges.length(); for (int j = 0; j < range_count; j++) ranges[j].AddCaseEquivalents(is_ascii, unicode, &ranges); diff --git a/js/src/irregexp/RegExpEngine.h b/js/src/irregexp/RegExpEngine.h index ca315c2ad..9d026e25c 100644 --- a/js/src/irregexp/RegExpEngine.h +++ b/js/src/irregexp/RegExpEngine.h @@ -1508,6 +1508,9 @@ class Analysis : public NodeVisitor void operator=(Analysis&) = delete; }; +void +AddClassNegated(const int* elmv, int elmc, CharacterRangeVector* ranges); + } } // namespace js::irregexp #endif // V8_JSREGEXP_H_ diff --git a/js/src/irregexp/RegExpParser.cpp b/js/src/irregexp/RegExpParser.cpp index d452178e6..445d9f0a6 100644 --- a/js/src/irregexp/RegExpParser.cpp +++ b/js/src/irregexp/RegExpParser.cpp @@ -35,6 +35,14 @@ using namespace js; using namespace js::irregexp; +// Bug 1373195 put these into RegExpCharacters, but we don't have that +// in this version of irregexp, so this should be kept in sync with +// RegExpEngine. +static const int kLineTerminatorAndSurrogateRanges[] = { 0x000A, 0x000B, + 0x000D, 0x000E, 0x2028, 0x202A, 0xD800, 0xE000, 0x10000 }; +static const int kLineTerminatorAndSurrogateRangeCount = 9; + + // ---------------------------------------------------------------------------- // RegExpBuilder @@ -1335,11 +1343,9 @@ UnicodeEverythingAtom(LifoAlloc* alloc) // everything except \x0a, \x0d, \u2028 and \u2029 CharacterRangeVector* ranges = alloc->newInfallible(*alloc); - ranges->append(CharacterRange::Range(0x0, 0x09)); - ranges->append(CharacterRange::Range(0x0b, 0x0c)); - ranges->append(CharacterRange::Range(0x0e, 0x2027)); - ranges->append(CharacterRange::Range(0x202A, unicode::LeadSurrogateMin - 1)); - ranges->append(CharacterRange::Range(unicode::TrailSurrogateMax + 1, unicode::UTF16Max)); + AddClassNegated(kLineTerminatorAndSurrogateRanges, + kLineTerminatorAndSurrogateRangeCount, + ranges); builder->AddAtom(alloc->newInfallible(ranges, false)); builder->NewAlternative();