#393, Bug 1135377 - Part 1: Implement RegExp unicode flag. r=till, f=anba

This commit is contained in:
Tooru Fujisawa 2015-08-07 08:10:05 +09:00 committed by Cameron Kaiser
parent ac6710b0fd
commit d00063089b
12 changed files with 77 additions and 41 deletions

View File

@ -176,8 +176,11 @@ RegExpInitializeIgnoringLastIndex(JSContext* cx, Handle<RegExpObject*> obj,
/* Steps 8-10. */ /* Steps 8-10. */
CompileOptions options(cx); CompileOptions options(cx);
frontend::TokenStream dummyTokenStream(cx, options, nullptr, 0, nullptr); frontend::TokenStream dummyTokenStream(cx, options, nullptr, 0, nullptr);
if (!irregexp::ParsePatternSyntax(dummyTokenStream, cx->tempLifoAlloc(), pattern)) if (!irregexp::ParsePatternSyntax(dummyTokenStream, cx->tempLifoAlloc(), pattern,
flags & UnicodeFlag))
{
return false; return false;
}
if (staticsUse == UseRegExpStatics) { if (staticsUse == UseRegExpStatics) {
RegExpStatics* res = cx->global()->getRegExpStatics(cx); RegExpStatics* res = cx->global()->getRegExpStatics(cx);
@ -561,6 +564,24 @@ regexp_sticky(JSContext* cx, unsigned argc, JS::Value* vp)
return CallNonGenericMethod<IsRegExpObject, regexp_sticky_impl>(cx, args); return CallNonGenericMethod<IsRegExpObject, regexp_sticky_impl>(cx, args);
} }
/* ES6 21.2.5.15. */
MOZ_ALWAYS_INLINE bool
regexp_unicode_impl(JSContext* cx, const CallArgs& args)
{
MOZ_ASSERT(IsRegExpObject(args.thisv()));
/* Steps 4-6. */
args.rval().setBoolean(args.thisv().toObject().as<RegExpObject>().unicode());
return true;
}
static bool
regexp_unicode(JSContext* cx, unsigned argc, JS::Value* vp)
{
/* Steps 1-3. */
CallArgs args = CallArgsFromVp(argc, vp);
return CallNonGenericMethod<IsRegExpObject, regexp_unicode_impl>(cx, args);
}
const JSPropertySpec js::regexp_properties[] = { const JSPropertySpec js::regexp_properties[] = {
JS_SELF_HOSTED_GET("flags", "RegExpFlagsGetter", 0), JS_SELF_HOSTED_GET("flags", "RegExpFlagsGetter", 0),
JS_PSG("global", regexp_global, 0), JS_PSG("global", regexp_global, 0),
@ -568,6 +589,7 @@ const JSPropertySpec js::regexp_properties[] = {
JS_PSG("multiline", regexp_multiline, 0), JS_PSG("multiline", regexp_multiline, 0),
JS_PSG("source", regexp_source, 0), JS_PSG("source", regexp_source, 0),
JS_PSG("sticky", regexp_sticky, 0), JS_PSG("sticky", regexp_sticky, 0),
JS_PSG("unicode", regexp_unicode, 0),
JS_PS_END JS_PS_END
}; };

View File

@ -25,9 +25,8 @@ function RegExpFlagsGetter() {
result += "m"; result += "m";
// Steps 13-15. // Steps 13-15.
// TODO: Uncomment these steps when bug 1135377 is fixed. if (R.unicode)
// if (R.unicode) result += "u";
// result += "u";
// Steps 16-18. // Steps 16-18.
if (R.sticky) if (R.sticky)

View File

@ -1582,6 +1582,8 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
reflags = RegExpFlag(reflags | MultilineFlag); reflags = RegExpFlag(reflags | MultilineFlag);
else if (c == 'y' && !(reflags & StickyFlag)) else if (c == 'y' && !(reflags & StickyFlag))
reflags = RegExpFlag(reflags | StickyFlag); reflags = RegExpFlag(reflags | StickyFlag);
else if (c == 'u' && !(reflags & UnicodeFlag))
reflags = RegExpFlag(reflags | UnicodeFlag);
else else
break; break;
getChar(); getChar();

View File

@ -205,7 +205,8 @@ RegExpBuilder::AddQuantifierToAtom(int min, int max,
template <typename CharT> template <typename CharT>
RegExpParser<CharT>::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc, RegExpParser<CharT>::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
const CharT* chars, const CharT* end, bool multiline_mode) const CharT* chars, const CharT* end, bool multiline_mode,
bool unicode)
: ts(ts), : ts(ts),
alloc(alloc), alloc(alloc),
captures_(nullptr), captures_(nullptr),
@ -215,6 +216,7 @@ RegExpParser<CharT>::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
capture_count_(0), capture_count_(0),
has_more_(true), has_more_(true),
multiline_(multiline_mode), multiline_(multiline_mode),
unicode_(unicode),
simple_(false), simple_(false),
contains_anchor_(false), contains_anchor_(false),
is_scanned_for_captures_(false) is_scanned_for_captures_(false)
@ -1002,7 +1004,7 @@ template class irregexp::RegExpParser<char16_t>;
template <typename CharT> template <typename CharT>
static bool static bool
ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length, ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length,
bool multiline, bool match_only, RegExpCompileData* data) bool multiline, bool match_only, bool unicode, RegExpCompileData* data)
{ {
if (match_only) { if (match_only) {
// Try to strip a leading '.*' from the RegExp, but only if it is not // Try to strip a leading '.*' from the RegExp, but only if it is not
@ -1025,7 +1027,7 @@ ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, si
} }
} }
RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, multiline); RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, multiline, unicode);
data->tree = parser.ParsePattern(); data->tree = parser.ParsePattern();
if (!data->tree) if (!data->tree)
return false; return false;
@ -1038,32 +1040,34 @@ ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, si
bool bool
irregexp::ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str, irregexp::ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
bool multiline, bool match_only, bool multiline, bool match_only, bool unicode,
RegExpCompileData* data) RegExpCompileData* data)
{ {
JS::AutoCheckCannotGC nogc; JS::AutoCheckCannotGC nogc;
return str->hasLatin1Chars() return str->hasLatin1Chars()
? ::ParsePattern(ts, alloc, str->latin1Chars(nogc), str->length(), ? ::ParsePattern(ts, alloc, str->latin1Chars(nogc), str->length(),
multiline, match_only, data) multiline, match_only, unicode, data)
: ::ParsePattern(ts, alloc, str->twoByteChars(nogc), str->length(), : ::ParsePattern(ts, alloc, str->twoByteChars(nogc), str->length(),
multiline, match_only, data); multiline, match_only, unicode, data);
} }
template <typename CharT> template <typename CharT>
static bool static bool
ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length) ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length,
bool unicode)
{ {
LifoAllocScope scope(&alloc); LifoAllocScope scope(&alloc);
RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, false); RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, false, unicode);
return parser.ParsePattern() != nullptr; return parser.ParsePattern() != nullptr;
} }
bool bool
irregexp::ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str) irregexp::ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
bool unicode)
{ {
JS::AutoCheckCannotGC nogc; JS::AutoCheckCannotGC nogc;
return str->hasLatin1Chars() return str->hasLatin1Chars()
? ::ParsePatternSyntax(ts, alloc, str->latin1Chars(nogc), str->length()) ? ::ParsePatternSyntax(ts, alloc, str->latin1Chars(nogc), str->length(), unicode)
: ::ParsePatternSyntax(ts, alloc, str->twoByteChars(nogc), str->length()); : ::ParsePatternSyntax(ts, alloc, str->twoByteChars(nogc), str->length(), unicode);
} }

View File

@ -43,11 +43,12 @@ namespace irregexp {
bool bool
ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str, ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
bool multiline, bool match_only, bool multiline, bool match_only, bool unicode,
RegExpCompileData* data); RegExpCompileData* data);
bool bool
ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str); ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
bool unicode);
// A BufferedVector is an automatically growing list, just like (and backed // A BufferedVector is an automatically growing list, just like (and backed
// by) a Vector, that is optimized for the case of adding and removing // by) a Vector, that is optimized for the case of adding and removing
@ -174,7 +175,7 @@ class RegExpParser
{ {
public: public:
RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc, RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
const CharT* chars, const CharT* end, bool multiline_mode); const CharT* chars, const CharT* end, bool multiline_mode, bool unicode);
RegExpTree* ParsePattern(); RegExpTree* ParsePattern();
RegExpTree* ParseDisjunction(); RegExpTree* ParseDisjunction();
@ -288,6 +289,7 @@ class RegExpParser
int capture_count_; int capture_count_;
bool has_more_; bool has_more_;
bool multiline_; bool multiline_;
bool unicode_;
bool simple_; bool simple_;
bool contains_anchor_; bool contains_anchor_;
bool is_scanned_for_captures_; bool is_scanned_for_captures_;

View File

@ -4959,6 +4959,7 @@ JS_ObjectIsDate(JSContext* cx, JS::HandleObject obj, bool* isDate);
#define JSREG_GLOB 0x02u /* global exec, creates array of matches */ #define JSREG_GLOB 0x02u /* global exec, creates array of matches */
#define JSREG_MULTILINE 0x04u /* treat ^ and $ as begin and end of line */ #define JSREG_MULTILINE 0x04u /* treat ^ and $ as begin and end of line */
#define JSREG_STICKY 0x08u /* only match starting at lastIndex */ #define JSREG_STICKY 0x08u /* only match starting at lastIndex */
#define JSREG_UNICODE 0x10u /* unicode */
extern JS_PUBLIC_API(JSObject*) extern JS_PUBLIC_API(JSObject*)
JS_NewRegExpObject(JSContext* cx, JS::HandleObject obj, const char* bytes, size_t length, JS_NewRegExpObject(JSContext* cx, JS::HandleObject obj, const char* bytes, size_t length,

View File

@ -10,7 +10,7 @@ var getters = [
"multiline", "multiline",
"source", "source",
"sticky", "sticky",
//"unicode", "unicode",
]; ];
for (var name of getters) { for (var name of getters) {
@ -21,9 +21,5 @@ for (var name of getters) {
assertEq("get" in desc, true); assertEq("get" in desc, true);
} }
// When the /u flag is supported, remove this comment and the next line, and
// uncomment "unicode" in |props| above.
assertThrowsInstanceOf(() => RegExp("", "mygui").flags, SyntaxError);
if (typeof reportCompare === "function") if (typeof reportCompare === "function")
reportCompare(true, true); reportCompare(true, true);

View File

@ -8,17 +8,14 @@ var props = [
"ignoreCase", "ignoreCase",
"multiline", "multiline",
"sticky", "sticky",
//"unicode", "unicode",
]; ];
testThrows(RegExp.prototype); testThrows(RegExp.prototype);
test(/foo/iymg, [true, true, true, true, false]); test(/foo/iymg, [true, true, true, true, false]);
test(RegExp(""), [false, false, false, false, false]); test(RegExp(""), [false, false, false, false, false]);
test(RegExp("", "mygi"), [true, true, true, true, false]); test(RegExp("", "mygi"), [true, true, true, true, false]);
// When the /u flag is supported, remove the following line, uncomment the test(RegExp("", "mygiu"), [true, true, true, true, true]);
// next line, and uncomment "unicode" in |props| above.
assertThrowsInstanceOf(() => RegExp("", "mygui").flags, SyntaxError);
// test(RegExp("", "mygiu"), [true, true, true, true, true]);
testThrowsGeneric(); testThrowsGeneric();
testThrowsGeneric(1); testThrowsGeneric(1);

View File

@ -7,16 +7,12 @@ assertEq(RegExp.prototype.flags, "");
assertEq(/foo/iymg.flags, "gimy"); assertEq(/foo/iymg.flags, "gimy");
assertEq(RegExp("").flags, ""); assertEq(RegExp("").flags, "");
assertEq(RegExp("", "mygi").flags, "gimy"); assertEq(RegExp("", "mygi").flags, "gimy");
// TODO: Uncomment lines 12, 16, 19 and remove lines 11, 15, 18 when bug 1135377 is fixed. assertEq(RegExp("", "mygui").flags, "gimuy");
assertThrowsInstanceOf(() => RegExp("", "mygui").flags, SyntaxError);
// assertEq(RegExp("", "mygui").flags, "gimuy");
assertEq(genericFlags({}), ""); assertEq(genericFlags({}), "");
assertEq(genericFlags({ignoreCase: true}), "i"); assertEq(genericFlags({ignoreCase: true}), "i");
assertEq(genericFlags({sticky:1, unicode:1, global: 0}), "y"); assertEq(genericFlags({sticky:1, unicode:1, global: 0}), "uy");
// assertEq(genericFlags({sticky:1, unicode:1, global: 0}), "uy");
assertEq(genericFlags({__proto__: {multiline: true}}), "m"); assertEq(genericFlags({__proto__: {multiline: true}}), "m");
assertEq(genericFlags(new Proxy({}, {get(){return true}})), "gimy"); assertEq(genericFlags(new Proxy({}, {get(){return true}})), "gimuy");
// assertEq(genericFlags(new Proxy({}, {get(){return true}})), "gimuy");
assertThrowsInstanceOf(() => genericFlags(), TypeError); assertThrowsInstanceOf(() => genericFlags(), TypeError);
assertThrowsInstanceOf(() => genericFlags(1), TypeError); assertThrowsInstanceOf(() => genericFlags(1), TypeError);

View File

@ -39,6 +39,7 @@ JS_STATIC_ASSERT(IgnoreCaseFlag == JSREG_FOLD);
JS_STATIC_ASSERT(GlobalFlag == JSREG_GLOB); JS_STATIC_ASSERT(GlobalFlag == JSREG_GLOB);
JS_STATIC_ASSERT(MultilineFlag == JSREG_MULTILINE); JS_STATIC_ASSERT(MultilineFlag == JSREG_MULTILINE);
JS_STATIC_ASSERT(StickyFlag == JSREG_STICKY); JS_STATIC_ASSERT(StickyFlag == JSREG_STICKY);
JS_STATIC_ASSERT(UnicodeFlag == JSREG_UNICODE);
RegExpObject* RegExpObject*
js::RegExpAlloc(ExclusiveContext* cx, HandleObject proto /* = nullptr */) js::RegExpAlloc(ExclusiveContext* cx, HandleObject proto /* = nullptr */)
@ -219,7 +220,7 @@ RegExpObject::createNoStatics(ExclusiveContext* cx, HandleAtom source, RegExpFla
tokenStream = dummyTokenStream.ptr(); tokenStream = dummyTokenStream.ptr();
} }
if (!irregexp::ParsePatternSyntax(*tokenStream, alloc, source)) if (!irregexp::ParsePatternSyntax(*tokenStream, alloc, source, flags & UnicodeFlag))
return nullptr; return nullptr;
Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx)); Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx));
@ -267,6 +268,7 @@ RegExpObject::initIgnoringLastIndex(HandleAtom source, RegExpFlag flags)
setIgnoreCase(flags & IgnoreCaseFlag); setIgnoreCase(flags & IgnoreCaseFlag);
setMultiline(flags & MultilineFlag); setMultiline(flags & MultilineFlag);
setSticky(flags & StickyFlag); setSticky(flags & StickyFlag);
setUnicode(flags & UnicodeFlag);
} }
void void
@ -455,6 +457,8 @@ RegExpObject::toString(JSContext* cx) const
return nullptr; return nullptr;
if (multiline() && !sb.append('m')) if (multiline() && !sb.append('m'))
return nullptr; return nullptr;
if (unicode() && !sb.append('u'))
return nullptr;
if (sticky() && !sb.append('y')) if (sticky() && !sb.append('y'))
return nullptr; return nullptr;
@ -515,7 +519,7 @@ RegExpShared::compile(JSContext* cx, HandleAtom pattern, HandleLinearString inpu
/* Parse the pattern. */ /* Parse the pattern. */
irregexp::RegExpCompileData data; irregexp::RegExpCompileData data;
if (!irregexp::ParsePattern(dummyTokenStream, cx->tempLifoAlloc(), pattern, if (!irregexp::ParsePattern(dummyTokenStream, cx->tempLifoAlloc(), pattern,
multiline(), mode == MatchOnly, &data)) multiline(), mode == MatchOnly, unicode(), &data))
{ {
return false; return false;
} }
@ -949,6 +953,10 @@ ParseRegExpFlags(const CharT* chars, size_t length, RegExpFlag* flagsOut, char16
if (!HandleRegExpFlag(StickyFlag, flagsOut)) if (!HandleRegExpFlag(StickyFlag, flagsOut))
return false; return false;
break; break;
case 'u':
if (!HandleRegExpFlag(UnicodeFlag, flagsOut))
return false;
break;
default: default:
return false; return false;
} }

View File

@ -51,9 +51,10 @@ enum RegExpFlag
GlobalFlag = 0x02, GlobalFlag = 0x02,
MultilineFlag = 0x04, MultilineFlag = 0x04,
StickyFlag = 0x08, StickyFlag = 0x08,
UnicodeFlag = 0x10,
NoFlags = 0x00, NoFlags = 0x00,
AllFlags = 0x0f AllFlags = 0x1f
}; };
enum RegExpRunStatus enum RegExpRunStatus
@ -186,6 +187,7 @@ class RegExpShared
bool global() const { return flags & GlobalFlag; } bool global() const { return flags & GlobalFlag; }
bool multiline() const { return flags & MultilineFlag; } bool multiline() const { return flags & MultilineFlag; }
bool sticky() const { return flags & StickyFlag; } bool sticky() const { return flags & StickyFlag; }
bool unicode() const { return flags & UnicodeFlag; }
bool isCompiled(CompilationMode mode, bool latin1, bool isCompiled(CompilationMode mode, bool latin1,
ForceByteCodeEnum force = DontForceByteCode) const { ForceByteCodeEnum force = DontForceByteCode) const {
@ -340,9 +342,10 @@ class RegExpObject : public NativeObject
static const unsigned IGNORE_CASE_FLAG_SLOT = 3; static const unsigned IGNORE_CASE_FLAG_SLOT = 3;
static const unsigned MULTILINE_FLAG_SLOT = 4; static const unsigned MULTILINE_FLAG_SLOT = 4;
static const unsigned STICKY_FLAG_SLOT = 5; static const unsigned STICKY_FLAG_SLOT = 5;
static const unsigned UNICODE_FLAG_SLOT = 6;
public: public:
static const unsigned RESERVED_SLOTS = 6; static const unsigned RESERVED_SLOTS = 7;
static const unsigned PRIVATE_SLOT = 7; static const unsigned PRIVATE_SLOT = 7;
static const Class class_; static const Class class_;
@ -407,6 +410,7 @@ class RegExpObject : public NativeObject
flags |= ignoreCase() ? IgnoreCaseFlag : 0; flags |= ignoreCase() ? IgnoreCaseFlag : 0;
flags |= multiline() ? MultilineFlag : 0; flags |= multiline() ? MultilineFlag : 0;
flags |= sticky() ? StickyFlag : 0; flags |= sticky() ? StickyFlag : 0;
flags |= unicode() ? UnicodeFlag : 0;
return RegExpFlag(flags); return RegExpFlag(flags);
} }
@ -432,10 +436,15 @@ class RegExpObject : public NativeObject
setSlot(STICKY_FLAG_SLOT, BooleanValue(enabled)); setSlot(STICKY_FLAG_SLOT, BooleanValue(enabled));
} }
void setUnicode(bool enabled) {
setSlot(UNICODE_FLAG_SLOT, BooleanValue(enabled));
}
bool ignoreCase() const { return getFixedSlot(IGNORE_CASE_FLAG_SLOT).toBoolean(); } bool ignoreCase() const { return getFixedSlot(IGNORE_CASE_FLAG_SLOT).toBoolean(); }
bool global() const { return getFixedSlot(GLOBAL_FLAG_SLOT).toBoolean(); } bool global() const { return getFixedSlot(GLOBAL_FLAG_SLOT).toBoolean(); }
bool multiline() const { return getFixedSlot(MULTILINE_FLAG_SLOT).toBoolean(); } bool multiline() const { return getFixedSlot(MULTILINE_FLAG_SLOT).toBoolean(); }
bool sticky() const { return getFixedSlot(STICKY_FLAG_SLOT).toBoolean(); } bool sticky() const { return getFixedSlot(STICKY_FLAG_SLOT).toBoolean(); }
bool unicode() const { return getFixedSlot(UNICODE_FLAG_SLOT).toBoolean(); }
bool getShared(JSContext* cx, RegExpGuard* g); bool getShared(JSContext* cx, RegExpGuard* g);

View File

@ -198,7 +198,7 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=933681
gPrototypeProperties['RegExp'] = gPrototypeProperties['RegExp'] =
["constructor", "toSource", "toString", "compile", "exec", "test", ["constructor", "toSource", "toString", "compile", "exec", "test",
"flags", "global", "ignoreCase", "multiline", "source", "sticky", "flags", "global", "ignoreCase", "multiline", "source", "sticky", "unicode",
"lastIndex"]; "lastIndex"];
// Sort an array that may contain symbols as well as strings. // Sort an array that may contain symbols as well as strings.
@ -612,7 +612,7 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=933681
// Test with modified flags accessors // Test with modified flags accessors
iwin.eval(` iwin.eval(`
var props = ["global", "ignoreCase", "multiline", "sticky", "source"]; var props = ["global", "ignoreCase", "multiline", "sticky", "source", "unicode"];
var origDescs = {}; var origDescs = {};
for (var prop of props) { for (var prop of props) {
origDescs[prop] = Object.getOwnPropertyDescriptor(RegExp.prototype, prop); origDescs[prop] = Object.getOwnPropertyDescriptor(RegExp.prototype, prop);