#415: M1373195 backport

2025-03-03 18:30:28 +00:00 · 2017-07-08 22:55:44 -07:00 · 2017-07-08 22:55:44 -07:00 · e880223afd
commit e880223afd
parent 824c777eef
4 changed files with 37 additions and 60 deletions
--- a/js/src/irregexp/NativeRegExpMacroAssembler.cpp.rej
+++ b/js/src/irregexp/NativeRegExpMacroAssembler.cpp.rej
@ -1,50 +0,0 @@
-diff a/js/src/irregexp/NativeRegExpMacroAssembler.cpp b/js/src/irregexp/NativeRegExpMacroAssembler.cpp	(rejected hunks)
-@@ -714,19 +714,20 @@ NativeRegExpMacroAssembler::CheckNotBack
- 
-     // Restore backtrack stack pointer.
-     masm.pop(backtrack_stack_pointer);
- 
-     masm.bind(&fallthrough);
- }
- 
- void
-NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, Label* on_no_match)
-+NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, Label* on_no_match,
-+                                                            bool unicode)
- {
-    JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d)", start_reg);
-+    JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d, %d)", start_reg, unicode);
- 
-     Label fallthrough;
- 
-     masm.loadPtr(register_location(start_reg), current_character);  // Index of start of capture
-     masm.loadPtr(register_location(start_reg + 1), temp1);  // Index of end of capture
-     masm.subPtr(current_character, temp1);  // Length of capture.
- 
-     // The length of a capture should not be negative. This can only happen
-@@ -828,18 +829,23 @@ NativeRegExpMacroAssembler::CheckNotBack
-         // Parameters are
-         //   Address byte_offset1 - Address captured substring's start.
-         //   Address byte_offset2 - Address of current character position.
-         //   size_t byte_length - length of capture in bytes(!)
-         masm.setupUnalignedABICall(temp0);
-         masm.passABIArg(current_character);
-         masm.passABIArg(current_position);
-         masm.passABIArg(temp1);
-        int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings;
-        masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
-+        if (!unicode) {
-+            int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings;
-+            masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
-+        } else {
-+            int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareUCStrings;
-+            masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
-+        }
-         masm.storeCallResult(temp0);
- 
-         masm.PopRegsInMask(volatileRegs);
- 
-         // Check if function returned non-zero for success or zero for failure.
-         masm.branchTest32(Assembler::Zero, temp0, temp0, BranchOrBacktrack(on_no_match));
- 
-         // On success, increment position by length of capture.
--- a/js/src/irregexp/RegExpEngine.cpp
+++ b/js/src/irregexp/RegExpEngine.cpp
@ -116,6 +116,10 @@ static const int kSurrogateRangeCount = ArrayLength(kSurrogateRanges);
 static const int kLineTerminatorRanges[] = { 0x000A, 0x000B, 0x000D, 0x000E,
    0x2028, 0x202A, 0x10000 };
 static const int kLineTerminatorRangeCount = ArrayLength(kLineTerminatorRanges);
+// bug 1373195
+static const int kLineTerminatorAndSurrogateRanges[] = { 0x000A, 0x000B,
+    0x000D, 0x000E, 0x2028, 0x202A, 0xD800, 0xE000, 0x10000 };
+static const int kLineTerminatorAndSurrogateRangeCount = ArrayLength(kLineTerminatorAndSurrogateRanges);
 static const int kMaxOneByteCharCode = 0xff;
 static const int kMaxUtf16CodeUnit = 0xffff;

@ -137,10 +141,10 @@ AddClass(const int* elmv, int elmc,
    }
 }

-static void
-AddClassNegated(const int* elmv,
-                int elmc,
-                CharacterRangeVector* ranges)
+void
+js::irregexp::AddClassNegated(const int* elmv,
+                              int elmc,
+                              CharacterRangeVector* ranges)
 {
    elmc--;
    MOZ_ASSERT(elmv[elmc] == 0x10000);
@ -275,7 +279,7 @@ static const size_t kEcma262UnCanonicalizeMaxWidth = 4;

 // Returns the number of characters in the equivalence class, omitting those
 // that cannot occur in the source string if it is a one byte string.
-static int
+static MOZ_ALWAYS_INLINE int
 GetCaseIndependentLetters(char16_t character,
                          bool ascii_subject,
                          bool unicode,
@ -378,6 +382,10 @@ CharacterRange::AddCaseEquivalents(bool is_ascii, bool unicode, CharacterRangeVe
            return;
        if (top > kMaxOneByteCharCode)
            top = kMaxOneByteCharCode;
+    } else {
+        // Nothing to do for surrogates.
+        if (bottom >= unicode::LeadSurrogateMin && top <= unicode::TrailSurrogateMax)
+            return;
    }

    for (char16_t c = bottom;; c++) {
@ -917,7 +925,17 @@ void TextNode::MakeCaseIndependent(bool is_ascii, bool unicode)
            if (cc->is_standard(alloc()))
                continue;

+            // Similarly, there's nothing to do for the character class
+            // containing all characters except line terminators and surrogates.
+            // This one is added by UnicodeEverythingAtom.
            CharacterRangeVector& ranges = cc->ranges(alloc());
+            if (CompareInverseRanges(ranges,
+                                     kLineTerminatorAndSurrogateRanges,
+                                     kLineTerminatorAndSurrogateRangeCount))
+            {
+                continue;
+            }
+
            int range_count = ranges.length();
            for (int j = 0; j < range_count; j++)
                ranges[j].AddCaseEquivalents(is_ascii, unicode, &ranges);
--- a/js/src/irregexp/RegExpEngine.h
+++ b/js/src/irregexp/RegExpEngine.h
@ -1508,6 +1508,9 @@ class Analysis : public NodeVisitor
    void operator=(Analysis&) = delete;
 };

+void
+AddClassNegated(const int* elmv, int elmc, CharacterRangeVector* ranges);
+
 } }  // namespace js::irregexp

 #endif  // V8_JSREGEXP_H_
--- a/js/src/irregexp/RegExpParser.cpp
+++ b/js/src/irregexp/RegExpParser.cpp
@ -35,6 +35,14 @@
 using namespace js;
 using namespace js::irregexp;

+// Bug 1373195 put these into RegExpCharacters, but we don't have that
+// in this version of irregexp, so this should be kept in sync with
+// RegExpEngine.
+static const int kLineTerminatorAndSurrogateRanges[] = { 0x000A, 0x000B,
+    0x000D, 0x000E, 0x2028, 0x202A, 0xD800, 0xE000, 0x10000 };
+static const int kLineTerminatorAndSurrogateRangeCount = 9;
+
+
 // ----------------------------------------------------------------------------
 // RegExpBuilder

@ -1335,11 +1343,9 @@ UnicodeEverythingAtom(LifoAlloc* alloc)
    // everything except \x0a, \x0d, \u2028 and \u2029

    CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
-    ranges->append(CharacterRange::Range(0x0, 0x09));
-    ranges->append(CharacterRange::Range(0x0b, 0x0c));
-    ranges->append(CharacterRange::Range(0x0e, 0x2027));
-    ranges->append(CharacterRange::Range(0x202A, unicode::LeadSurrogateMin - 1));
-    ranges->append(CharacterRange::Range(unicode::TrailSurrogateMax + 1, unicode::UTF16Max));
+    AddClassNegated(kLineTerminatorAndSurrogateRanges,
+                    kLineTerminatorAndSurrogateRangeCount,
+                    ranges);
    builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(ranges, false));

    builder->NewAlternative();