diff --git a/js/src/builtin/RegExp.cpp b/js/src/builtin/RegExp.cpp index 2991118eb..909f58a0a 100644 --- a/js/src/builtin/RegExp.cpp +++ b/js/src/builtin/RegExp.cpp @@ -176,8 +176,11 @@ RegExpInitializeIgnoringLastIndex(JSContext* cx, Handle obj, /* Steps 8-10. */ CompileOptions options(cx); frontend::TokenStream dummyTokenStream(cx, options, nullptr, 0, nullptr); - if (!irregexp::ParsePatternSyntax(dummyTokenStream, cx->tempLifoAlloc(), pattern)) + if (!irregexp::ParsePatternSyntax(dummyTokenStream, cx->tempLifoAlloc(), pattern, + flags & UnicodeFlag)) + { return false; + } if (staticsUse == UseRegExpStatics) { RegExpStatics* res = cx->global()->getRegExpStatics(cx); @@ -561,6 +564,24 @@ regexp_sticky(JSContext* cx, unsigned argc, JS::Value* vp) return CallNonGenericMethod(cx, args); } +/* ES6 21.2.5.15. */ +MOZ_ALWAYS_INLINE bool +regexp_unicode_impl(JSContext* cx, const CallArgs& args) +{ + MOZ_ASSERT(IsRegExpObject(args.thisv())); + /* Steps 4-6. */ + args.rval().setBoolean(args.thisv().toObject().as().unicode()); + return true; +} + +static bool +regexp_unicode(JSContext* cx, unsigned argc, JS::Value* vp) +{ + /* Steps 1-3. */ + CallArgs args = CallArgsFromVp(argc, vp); + return CallNonGenericMethod(cx, args); +} + const JSPropertySpec js::regexp_properties[] = { JS_SELF_HOSTED_GET("flags", "RegExpFlagsGetter", 0), JS_PSG("global", regexp_global, 0), @@ -568,6 +589,7 @@ const JSPropertySpec js::regexp_properties[] = { JS_PSG("multiline", regexp_multiline, 0), JS_PSG("source", regexp_source, 0), JS_PSG("sticky", regexp_sticky, 0), + JS_PSG("unicode", regexp_unicode, 0), JS_PS_END }; diff --git a/js/src/builtin/RegExp.js b/js/src/builtin/RegExp.js index 13cc5ba89..7b002dd47 100644 --- a/js/src/builtin/RegExp.js +++ b/js/src/builtin/RegExp.js @@ -25,9 +25,8 @@ function RegExpFlagsGetter() { result += "m"; // Steps 13-15. - // TODO: Uncomment these steps when bug 1135377 is fixed. - // if (R.unicode) - // result += "u"; + if (R.unicode) + result += "u"; // Steps 16-18. if (R.sticky) diff --git a/js/src/frontend/TokenStream.cpp b/js/src/frontend/TokenStream.cpp index 1570f4730..147c8128d 100644 --- a/js/src/frontend/TokenStream.cpp +++ b/js/src/frontend/TokenStream.cpp @@ -1582,6 +1582,8 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier) reflags = RegExpFlag(reflags | MultilineFlag); else if (c == 'y' && !(reflags & StickyFlag)) reflags = RegExpFlag(reflags | StickyFlag); + else if (c == 'u' && !(reflags & UnicodeFlag)) + reflags = RegExpFlag(reflags | UnicodeFlag); else break; getChar(); diff --git a/js/src/irregexp/RegExpParser.cpp b/js/src/irregexp/RegExpParser.cpp index e63092349..6ab53c59f 100644 --- a/js/src/irregexp/RegExpParser.cpp +++ b/js/src/irregexp/RegExpParser.cpp @@ -205,7 +205,8 @@ RegExpBuilder::AddQuantifierToAtom(int min, int max, template RegExpParser::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc, - const CharT* chars, const CharT* end, bool multiline_mode) + const CharT* chars, const CharT* end, bool multiline_mode, + bool unicode) : ts(ts), alloc(alloc), captures_(nullptr), @@ -215,6 +216,7 @@ RegExpParser::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc, capture_count_(0), has_more_(true), multiline_(multiline_mode), + unicode_(unicode), simple_(false), contains_anchor_(false), is_scanned_for_captures_(false) @@ -1002,7 +1004,7 @@ template class irregexp::RegExpParser; template static bool ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length, - bool multiline, bool match_only, RegExpCompileData* data) + bool multiline, bool match_only, bool unicode, RegExpCompileData* data) { if (match_only) { // Try to strip a leading '.*' from the RegExp, but only if it is not @@ -1025,7 +1027,7 @@ ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, si } } - RegExpParser parser(ts, &alloc, chars, chars + length, multiline); + RegExpParser parser(ts, &alloc, chars, chars + length, multiline, unicode); data->tree = parser.ParsePattern(); if (!data->tree) return false; @@ -1038,32 +1040,34 @@ ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, si bool irregexp::ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str, - bool multiline, bool match_only, + bool multiline, bool match_only, bool unicode, RegExpCompileData* data) { JS::AutoCheckCannotGC nogc; return str->hasLatin1Chars() ? ::ParsePattern(ts, alloc, str->latin1Chars(nogc), str->length(), - multiline, match_only, data) + multiline, match_only, unicode, data) : ::ParsePattern(ts, alloc, str->twoByteChars(nogc), str->length(), - multiline, match_only, data); + multiline, match_only, unicode, data); } template static bool -ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length) +ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length, + bool unicode) { LifoAllocScope scope(&alloc); - RegExpParser parser(ts, &alloc, chars, chars + length, false); + RegExpParser parser(ts, &alloc, chars, chars + length, false, unicode); return parser.ParsePattern() != nullptr; } bool -irregexp::ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str) +irregexp::ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str, + bool unicode) { JS::AutoCheckCannotGC nogc; return str->hasLatin1Chars() - ? ::ParsePatternSyntax(ts, alloc, str->latin1Chars(nogc), str->length()) - : ::ParsePatternSyntax(ts, alloc, str->twoByteChars(nogc), str->length()); + ? ::ParsePatternSyntax(ts, alloc, str->latin1Chars(nogc), str->length(), unicode) + : ::ParsePatternSyntax(ts, alloc, str->twoByteChars(nogc), str->length(), unicode); } diff --git a/js/src/irregexp/RegExpParser.h b/js/src/irregexp/RegExpParser.h index e36a47f75..740bff927 100644 --- a/js/src/irregexp/RegExpParser.h +++ b/js/src/irregexp/RegExpParser.h @@ -43,11 +43,12 @@ namespace irregexp { bool ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str, - bool multiline, bool match_only, + bool multiline, bool match_only, bool unicode, RegExpCompileData* data); bool -ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str); +ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str, + bool unicode); // A BufferedVector is an automatically growing list, just like (and backed // by) a Vector, that is optimized for the case of adding and removing @@ -174,7 +175,7 @@ class RegExpParser { public: RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc, - const CharT* chars, const CharT* end, bool multiline_mode); + const CharT* chars, const CharT* end, bool multiline_mode, bool unicode); RegExpTree* ParsePattern(); RegExpTree* ParseDisjunction(); @@ -288,6 +289,7 @@ class RegExpParser int capture_count_; bool has_more_; bool multiline_; + bool unicode_; bool simple_; bool contains_anchor_; bool is_scanned_for_captures_; diff --git a/js/src/jsapi.h b/js/src/jsapi.h index 77aa95251..c094a7af0 100644 --- a/js/src/jsapi.h +++ b/js/src/jsapi.h @@ -4959,6 +4959,7 @@ JS_ObjectIsDate(JSContext* cx, JS::HandleObject obj, bool* isDate); #define JSREG_GLOB 0x02u /* global exec, creates array of matches */ #define JSREG_MULTILINE 0x04u /* treat ^ and $ as begin and end of line */ #define JSREG_STICKY 0x08u /* only match starting at lastIndex */ +#define JSREG_UNICODE 0x10u /* unicode */ extern JS_PUBLIC_API(JSObject*) JS_NewRegExpObject(JSContext* cx, JS::HandleObject obj, const char* bytes, size_t length, diff --git a/js/src/tests/ecma_6/RegExp/descriptor.js b/js/src/tests/ecma_6/RegExp/descriptor.js index 62dda8521..cc545b3a6 100644 --- a/js/src/tests/ecma_6/RegExp/descriptor.js +++ b/js/src/tests/ecma_6/RegExp/descriptor.js @@ -10,7 +10,7 @@ var getters = [ "multiline", "source", "sticky", - //"unicode", + "unicode", ]; for (var name of getters) { @@ -21,9 +21,5 @@ for (var name of getters) { assertEq("get" in desc, true); } -// When the /u flag is supported, remove this comment and the next line, and -// uncomment "unicode" in |props| above. -assertThrowsInstanceOf(() => RegExp("", "mygui").flags, SyntaxError); - if (typeof reportCompare === "function") reportCompare(true, true); diff --git a/js/src/tests/ecma_6/RegExp/flag-accessors.js b/js/src/tests/ecma_6/RegExp/flag-accessors.js index 4f169c8a4..848b916c5 100644 --- a/js/src/tests/ecma_6/RegExp/flag-accessors.js +++ b/js/src/tests/ecma_6/RegExp/flag-accessors.js @@ -8,17 +8,14 @@ var props = [ "ignoreCase", "multiline", "sticky", - //"unicode", + "unicode", ]; testThrows(RegExp.prototype); test(/foo/iymg, [true, true, true, true, false]); test(RegExp(""), [false, false, false, false, false]); test(RegExp("", "mygi"), [true, true, true, true, false]); -// When the /u flag is supported, remove the following line, uncomment the -// next line, and uncomment "unicode" in |props| above. -assertThrowsInstanceOf(() => RegExp("", "mygui").flags, SyntaxError); -// test(RegExp("", "mygiu"), [true, true, true, true, true]); +test(RegExp("", "mygiu"), [true, true, true, true, true]); testThrowsGeneric(); testThrowsGeneric(1); diff --git a/js/src/tests/ecma_6/RegExp/flags.js b/js/src/tests/ecma_6/RegExp/flags.js index 5a0245d99..292fa4f3f 100644 --- a/js/src/tests/ecma_6/RegExp/flags.js +++ b/js/src/tests/ecma_6/RegExp/flags.js @@ -7,16 +7,12 @@ assertEq(RegExp.prototype.flags, ""); assertEq(/foo/iymg.flags, "gimy"); assertEq(RegExp("").flags, ""); assertEq(RegExp("", "mygi").flags, "gimy"); -// TODO: Uncomment lines 12, 16, 19 and remove lines 11, 15, 18 when bug 1135377 is fixed. -assertThrowsInstanceOf(() => RegExp("", "mygui").flags, SyntaxError); -// assertEq(RegExp("", "mygui").flags, "gimuy"); +assertEq(RegExp("", "mygui").flags, "gimuy"); assertEq(genericFlags({}), ""); assertEq(genericFlags({ignoreCase: true}), "i"); -assertEq(genericFlags({sticky:1, unicode:1, global: 0}), "y"); -// assertEq(genericFlags({sticky:1, unicode:1, global: 0}), "uy"); +assertEq(genericFlags({sticky:1, unicode:1, global: 0}), "uy"); assertEq(genericFlags({__proto__: {multiline: true}}), "m"); -assertEq(genericFlags(new Proxy({}, {get(){return true}})), "gimy"); -// assertEq(genericFlags(new Proxy({}, {get(){return true}})), "gimuy"); +assertEq(genericFlags(new Proxy({}, {get(){return true}})), "gimuy"); assertThrowsInstanceOf(() => genericFlags(), TypeError); assertThrowsInstanceOf(() => genericFlags(1), TypeError); diff --git a/js/src/vm/RegExpObject.cpp b/js/src/vm/RegExpObject.cpp index 9d0dd97d0..1faf252e7 100644 --- a/js/src/vm/RegExpObject.cpp +++ b/js/src/vm/RegExpObject.cpp @@ -39,6 +39,7 @@ JS_STATIC_ASSERT(IgnoreCaseFlag == JSREG_FOLD); JS_STATIC_ASSERT(GlobalFlag == JSREG_GLOB); JS_STATIC_ASSERT(MultilineFlag == JSREG_MULTILINE); JS_STATIC_ASSERT(StickyFlag == JSREG_STICKY); +JS_STATIC_ASSERT(UnicodeFlag == JSREG_UNICODE); RegExpObject* js::RegExpAlloc(ExclusiveContext* cx, HandleObject proto /* = nullptr */) @@ -219,7 +220,7 @@ RegExpObject::createNoStatics(ExclusiveContext* cx, HandleAtom source, RegExpFla tokenStream = dummyTokenStream.ptr(); } - if (!irregexp::ParsePatternSyntax(*tokenStream, alloc, source)) + if (!irregexp::ParsePatternSyntax(*tokenStream, alloc, source, flags & UnicodeFlag)) return nullptr; Rooted regexp(cx, RegExpAlloc(cx)); @@ -267,6 +268,7 @@ RegExpObject::initIgnoringLastIndex(HandleAtom source, RegExpFlag flags) setIgnoreCase(flags & IgnoreCaseFlag); setMultiline(flags & MultilineFlag); setSticky(flags & StickyFlag); + setUnicode(flags & UnicodeFlag); } void @@ -455,6 +457,8 @@ RegExpObject::toString(JSContext* cx) const return nullptr; if (multiline() && !sb.append('m')) return nullptr; + if (unicode() && !sb.append('u')) + return nullptr; if (sticky() && !sb.append('y')) return nullptr; @@ -515,7 +519,7 @@ RegExpShared::compile(JSContext* cx, HandleAtom pattern, HandleLinearString inpu /* Parse the pattern. */ irregexp::RegExpCompileData data; if (!irregexp::ParsePattern(dummyTokenStream, cx->tempLifoAlloc(), pattern, - multiline(), mode == MatchOnly, &data)) + multiline(), mode == MatchOnly, unicode(), &data)) { return false; } @@ -949,6 +953,10 @@ ParseRegExpFlags(const CharT* chars, size_t length, RegExpFlag* flagsOut, char16 if (!HandleRegExpFlag(StickyFlag, flagsOut)) return false; break; + case 'u': + if (!HandleRegExpFlag(UnicodeFlag, flagsOut)) + return false; + break; default: return false; } diff --git a/js/src/vm/RegExpObject.h b/js/src/vm/RegExpObject.h index 24e88deb5..dbf9efa98 100644 --- a/js/src/vm/RegExpObject.h +++ b/js/src/vm/RegExpObject.h @@ -51,9 +51,10 @@ enum RegExpFlag GlobalFlag = 0x02, MultilineFlag = 0x04, StickyFlag = 0x08, + UnicodeFlag = 0x10, NoFlags = 0x00, - AllFlags = 0x0f + AllFlags = 0x1f }; enum RegExpRunStatus @@ -186,6 +187,7 @@ class RegExpShared bool global() const { return flags & GlobalFlag; } bool multiline() const { return flags & MultilineFlag; } bool sticky() const { return flags & StickyFlag; } + bool unicode() const { return flags & UnicodeFlag; } bool isCompiled(CompilationMode mode, bool latin1, ForceByteCodeEnum force = DontForceByteCode) const { @@ -340,9 +342,10 @@ class RegExpObject : public NativeObject static const unsigned IGNORE_CASE_FLAG_SLOT = 3; static const unsigned MULTILINE_FLAG_SLOT = 4; static const unsigned STICKY_FLAG_SLOT = 5; + static const unsigned UNICODE_FLAG_SLOT = 6; public: - static const unsigned RESERVED_SLOTS = 6; + static const unsigned RESERVED_SLOTS = 7; static const unsigned PRIVATE_SLOT = 7; static const Class class_; @@ -407,6 +410,7 @@ class RegExpObject : public NativeObject flags |= ignoreCase() ? IgnoreCaseFlag : 0; flags |= multiline() ? MultilineFlag : 0; flags |= sticky() ? StickyFlag : 0; + flags |= unicode() ? UnicodeFlag : 0; return RegExpFlag(flags); } @@ -432,10 +436,15 @@ class RegExpObject : public NativeObject setSlot(STICKY_FLAG_SLOT, BooleanValue(enabled)); } + void setUnicode(bool enabled) { + setSlot(UNICODE_FLAG_SLOT, BooleanValue(enabled)); + } + bool ignoreCase() const { return getFixedSlot(IGNORE_CASE_FLAG_SLOT).toBoolean(); } bool global() const { return getFixedSlot(GLOBAL_FLAG_SLOT).toBoolean(); } bool multiline() const { return getFixedSlot(MULTILINE_FLAG_SLOT).toBoolean(); } bool sticky() const { return getFixedSlot(STICKY_FLAG_SLOT).toBoolean(); } + bool unicode() const { return getFixedSlot(UNICODE_FLAG_SLOT).toBoolean(); } bool getShared(JSContext* cx, RegExpGuard* g); diff --git a/js/xpconnect/tests/chrome/test_xrayToJS.xul b/js/xpconnect/tests/chrome/test_xrayToJS.xul index f600dd0ee..cbcd427d2 100644 --- a/js/xpconnect/tests/chrome/test_xrayToJS.xul +++ b/js/xpconnect/tests/chrome/test_xrayToJS.xul @@ -198,7 +198,7 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=933681 gPrototypeProperties['RegExp'] = ["constructor", "toSource", "toString", "compile", "exec", "test", - "flags", "global", "ignoreCase", "multiline", "source", "sticky", + "flags", "global", "ignoreCase", "multiline", "source", "sticky", "unicode", "lastIndex"]; // Sort an array that may contain symbols as well as strings. @@ -612,7 +612,7 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=933681 // Test with modified flags accessors iwin.eval(` -var props = ["global", "ignoreCase", "multiline", "sticky", "source"]; +var props = ["global", "ignoreCase", "multiline", "sticky", "source", "unicode"]; var origDescs = {}; for (var prop of props) { origDescs[prop] = Object.getOwnPropertyDescriptor(RegExp.prototype, prop);