#393, Bug 1135377 - Part 1: Implement RegExp unicode flag. r=till, f=anba

This commit is contained in:
Tooru Fujisawa 2015-08-07 08:10:05 +09:00 committed by Cameron Kaiser
parent ac6710b0fd
commit d00063089b
12 changed files with 77 additions and 41 deletions

View File

@ -176,8 +176,11 @@ RegExpInitializeIgnoringLastIndex(JSContext* cx, Handle<RegExpObject*> obj,
/* Steps 8-10. */
CompileOptions options(cx);
frontend::TokenStream dummyTokenStream(cx, options, nullptr, 0, nullptr);
if (!irregexp::ParsePatternSyntax(dummyTokenStream, cx->tempLifoAlloc(), pattern))
if (!irregexp::ParsePatternSyntax(dummyTokenStream, cx->tempLifoAlloc(), pattern,
flags & UnicodeFlag))
{
return false;
}
if (staticsUse == UseRegExpStatics) {
RegExpStatics* res = cx->global()->getRegExpStatics(cx);
@ -561,6 +564,24 @@ regexp_sticky(JSContext* cx, unsigned argc, JS::Value* vp)
return CallNonGenericMethod<IsRegExpObject, regexp_sticky_impl>(cx, args);
}
/* ES6 21.2.5.15. */
MOZ_ALWAYS_INLINE bool
regexp_unicode_impl(JSContext* cx, const CallArgs& args)
{
MOZ_ASSERT(IsRegExpObject(args.thisv()));
/* Steps 4-6. */
args.rval().setBoolean(args.thisv().toObject().as<RegExpObject>().unicode());
return true;
}
static bool
regexp_unicode(JSContext* cx, unsigned argc, JS::Value* vp)
{
/* Steps 1-3. */
CallArgs args = CallArgsFromVp(argc, vp);
return CallNonGenericMethod<IsRegExpObject, regexp_unicode_impl>(cx, args);
}
const JSPropertySpec js::regexp_properties[] = {
JS_SELF_HOSTED_GET("flags", "RegExpFlagsGetter", 0),
JS_PSG("global", regexp_global, 0),
@ -568,6 +589,7 @@ const JSPropertySpec js::regexp_properties[] = {
JS_PSG("multiline", regexp_multiline, 0),
JS_PSG("source", regexp_source, 0),
JS_PSG("sticky", regexp_sticky, 0),
JS_PSG("unicode", regexp_unicode, 0),
JS_PS_END
};

View File

@ -25,9 +25,8 @@ function RegExpFlagsGetter() {
result += "m";
// Steps 13-15.
// TODO: Uncomment these steps when bug 1135377 is fixed.
// if (R.unicode)
// result += "u";
if (R.unicode)
result += "u";
// Steps 16-18.
if (R.sticky)

View File

@ -1582,6 +1582,8 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
reflags = RegExpFlag(reflags | MultilineFlag);
else if (c == 'y' && !(reflags & StickyFlag))
reflags = RegExpFlag(reflags | StickyFlag);
else if (c == 'u' && !(reflags & UnicodeFlag))
reflags = RegExpFlag(reflags | UnicodeFlag);
else
break;
getChar();

View File

@ -205,7 +205,8 @@ RegExpBuilder::AddQuantifierToAtom(int min, int max,
template <typename CharT>
RegExpParser<CharT>::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
const CharT* chars, const CharT* end, bool multiline_mode)
const CharT* chars, const CharT* end, bool multiline_mode,
bool unicode)
: ts(ts),
alloc(alloc),
captures_(nullptr),
@ -215,6 +216,7 @@ RegExpParser<CharT>::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
capture_count_(0),
has_more_(true),
multiline_(multiline_mode),
unicode_(unicode),
simple_(false),
contains_anchor_(false),
is_scanned_for_captures_(false)
@ -1002,7 +1004,7 @@ template class irregexp::RegExpParser<char16_t>;
template <typename CharT>
static bool
ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length,
bool multiline, bool match_only, RegExpCompileData* data)
bool multiline, bool match_only, bool unicode, RegExpCompileData* data)
{
if (match_only) {
// Try to strip a leading '.*' from the RegExp, but only if it is not
@ -1025,7 +1027,7 @@ ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, si
}
}
RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, multiline);
RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, multiline, unicode);
data->tree = parser.ParsePattern();
if (!data->tree)
return false;
@ -1038,32 +1040,34 @@ ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, si
bool
irregexp::ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
bool multiline, bool match_only,
bool multiline, bool match_only, bool unicode,
RegExpCompileData* data)
{
JS::AutoCheckCannotGC nogc;
return str->hasLatin1Chars()
? ::ParsePattern(ts, alloc, str->latin1Chars(nogc), str->length(),
multiline, match_only, data)
multiline, match_only, unicode, data)
: ::ParsePattern(ts, alloc, str->twoByteChars(nogc), str->length(),
multiline, match_only, data);
multiline, match_only, unicode, data);
}
template <typename CharT>
static bool
ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length)
ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length,
bool unicode)
{
LifoAllocScope scope(&alloc);
RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, false);
RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, false, unicode);
return parser.ParsePattern() != nullptr;
}
bool
irregexp::ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str)
irregexp::ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
bool unicode)
{
JS::AutoCheckCannotGC nogc;
return str->hasLatin1Chars()
? ::ParsePatternSyntax(ts, alloc, str->latin1Chars(nogc), str->length())
: ::ParsePatternSyntax(ts, alloc, str->twoByteChars(nogc), str->length());
? ::ParsePatternSyntax(ts, alloc, str->latin1Chars(nogc), str->length(), unicode)
: ::ParsePatternSyntax(ts, alloc, str->twoByteChars(nogc), str->length(), unicode);
}

View File

@ -43,11 +43,12 @@ namespace irregexp {
bool
ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
bool multiline, bool match_only,
bool multiline, bool match_only, bool unicode,
RegExpCompileData* data);
bool
ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str);
ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
bool unicode);
// A BufferedVector is an automatically growing list, just like (and backed
// by) a Vector, that is optimized for the case of adding and removing
@ -174,7 +175,7 @@ class RegExpParser
{
public:
RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
const CharT* chars, const CharT* end, bool multiline_mode);
const CharT* chars, const CharT* end, bool multiline_mode, bool unicode);
RegExpTree* ParsePattern();
RegExpTree* ParseDisjunction();
@ -288,6 +289,7 @@ class RegExpParser
int capture_count_;
bool has_more_;
bool multiline_;
bool unicode_;
bool simple_;
bool contains_anchor_;
bool is_scanned_for_captures_;

View File

@ -4959,6 +4959,7 @@ JS_ObjectIsDate(JSContext* cx, JS::HandleObject obj, bool* isDate);
#define JSREG_GLOB 0x02u /* global exec, creates array of matches */
#define JSREG_MULTILINE 0x04u /* treat ^ and $ as begin and end of line */
#define JSREG_STICKY 0x08u /* only match starting at lastIndex */
#define JSREG_UNICODE 0x10u /* unicode */
extern JS_PUBLIC_API(JSObject*)
JS_NewRegExpObject(JSContext* cx, JS::HandleObject obj, const char* bytes, size_t length,

View File

@ -10,7 +10,7 @@ var getters = [
"multiline",
"source",
"sticky",
//"unicode",
"unicode",
];
for (var name of getters) {
@ -21,9 +21,5 @@ for (var name of getters) {
assertEq("get" in desc, true);
}
// When the /u flag is supported, remove this comment and the next line, and
// uncomment "unicode" in |props| above.
assertThrowsInstanceOf(() => RegExp("", "mygui").flags, SyntaxError);
if (typeof reportCompare === "function")
reportCompare(true, true);

View File

@ -8,17 +8,14 @@ var props = [
"ignoreCase",
"multiline",
"sticky",
//"unicode",
"unicode",
];
testThrows(RegExp.prototype);
test(/foo/iymg, [true, true, true, true, false]);
test(RegExp(""), [false, false, false, false, false]);
test(RegExp("", "mygi"), [true, true, true, true, false]);
// When the /u flag is supported, remove the following line, uncomment the
// next line, and uncomment "unicode" in |props| above.
assertThrowsInstanceOf(() => RegExp("", "mygui").flags, SyntaxError);
// test(RegExp("", "mygiu"), [true, true, true, true, true]);
test(RegExp("", "mygiu"), [true, true, true, true, true]);
testThrowsGeneric();
testThrowsGeneric(1);

View File

@ -7,16 +7,12 @@ assertEq(RegExp.prototype.flags, "");
assertEq(/foo/iymg.flags, "gimy");
assertEq(RegExp("").flags, "");
assertEq(RegExp("", "mygi").flags, "gimy");
// TODO: Uncomment lines 12, 16, 19 and remove lines 11, 15, 18 when bug 1135377 is fixed.
assertThrowsInstanceOf(() => RegExp("", "mygui").flags, SyntaxError);
// assertEq(RegExp("", "mygui").flags, "gimuy");
assertEq(RegExp("", "mygui").flags, "gimuy");
assertEq(genericFlags({}), "");
assertEq(genericFlags({ignoreCase: true}), "i");
assertEq(genericFlags({sticky:1, unicode:1, global: 0}), "y");
// assertEq(genericFlags({sticky:1, unicode:1, global: 0}), "uy");
assertEq(genericFlags({sticky:1, unicode:1, global: 0}), "uy");
assertEq(genericFlags({__proto__: {multiline: true}}), "m");
assertEq(genericFlags(new Proxy({}, {get(){return true}})), "gimy");
// assertEq(genericFlags(new Proxy({}, {get(){return true}})), "gimuy");
assertEq(genericFlags(new Proxy({}, {get(){return true}})), "gimuy");
assertThrowsInstanceOf(() => genericFlags(), TypeError);
assertThrowsInstanceOf(() => genericFlags(1), TypeError);

View File

@ -39,6 +39,7 @@ JS_STATIC_ASSERT(IgnoreCaseFlag == JSREG_FOLD);
JS_STATIC_ASSERT(GlobalFlag == JSREG_GLOB);
JS_STATIC_ASSERT(MultilineFlag == JSREG_MULTILINE);
JS_STATIC_ASSERT(StickyFlag == JSREG_STICKY);
JS_STATIC_ASSERT(UnicodeFlag == JSREG_UNICODE);
RegExpObject*
js::RegExpAlloc(ExclusiveContext* cx, HandleObject proto /* = nullptr */)
@ -219,7 +220,7 @@ RegExpObject::createNoStatics(ExclusiveContext* cx, HandleAtom source, RegExpFla
tokenStream = dummyTokenStream.ptr();
}
if (!irregexp::ParsePatternSyntax(*tokenStream, alloc, source))
if (!irregexp::ParsePatternSyntax(*tokenStream, alloc, source, flags & UnicodeFlag))
return nullptr;
Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx));
@ -267,6 +268,7 @@ RegExpObject::initIgnoringLastIndex(HandleAtom source, RegExpFlag flags)
setIgnoreCase(flags & IgnoreCaseFlag);
setMultiline(flags & MultilineFlag);
setSticky(flags & StickyFlag);
setUnicode(flags & UnicodeFlag);
}
void
@ -455,6 +457,8 @@ RegExpObject::toString(JSContext* cx) const
return nullptr;
if (multiline() && !sb.append('m'))
return nullptr;
if (unicode() && !sb.append('u'))
return nullptr;
if (sticky() && !sb.append('y'))
return nullptr;
@ -515,7 +519,7 @@ RegExpShared::compile(JSContext* cx, HandleAtom pattern, HandleLinearString inpu
/* Parse the pattern. */
irregexp::RegExpCompileData data;
if (!irregexp::ParsePattern(dummyTokenStream, cx->tempLifoAlloc(), pattern,
multiline(), mode == MatchOnly, &data))
multiline(), mode == MatchOnly, unicode(), &data))
{
return false;
}
@ -949,6 +953,10 @@ ParseRegExpFlags(const CharT* chars, size_t length, RegExpFlag* flagsOut, char16
if (!HandleRegExpFlag(StickyFlag, flagsOut))
return false;
break;
case 'u':
if (!HandleRegExpFlag(UnicodeFlag, flagsOut))
return false;
break;
default:
return false;
}

View File

@ -51,9 +51,10 @@ enum RegExpFlag
GlobalFlag = 0x02,
MultilineFlag = 0x04,
StickyFlag = 0x08,
UnicodeFlag = 0x10,
NoFlags = 0x00,
AllFlags = 0x0f
AllFlags = 0x1f
};
enum RegExpRunStatus
@ -186,6 +187,7 @@ class RegExpShared
bool global() const { return flags & GlobalFlag; }
bool multiline() const { return flags & MultilineFlag; }
bool sticky() const { return flags & StickyFlag; }
bool unicode() const { return flags & UnicodeFlag; }
bool isCompiled(CompilationMode mode, bool latin1,
ForceByteCodeEnum force = DontForceByteCode) const {
@ -340,9 +342,10 @@ class RegExpObject : public NativeObject
static const unsigned IGNORE_CASE_FLAG_SLOT = 3;
static const unsigned MULTILINE_FLAG_SLOT = 4;
static const unsigned STICKY_FLAG_SLOT = 5;
static const unsigned UNICODE_FLAG_SLOT = 6;
public:
static const unsigned RESERVED_SLOTS = 6;
static const unsigned RESERVED_SLOTS = 7;
static const unsigned PRIVATE_SLOT = 7;
static const Class class_;
@ -407,6 +410,7 @@ class RegExpObject : public NativeObject
flags |= ignoreCase() ? IgnoreCaseFlag : 0;
flags |= multiline() ? MultilineFlag : 0;
flags |= sticky() ? StickyFlag : 0;
flags |= unicode() ? UnicodeFlag : 0;
return RegExpFlag(flags);
}
@ -432,10 +436,15 @@ class RegExpObject : public NativeObject
setSlot(STICKY_FLAG_SLOT, BooleanValue(enabled));
}
void setUnicode(bool enabled) {
setSlot(UNICODE_FLAG_SLOT, BooleanValue(enabled));
}
bool ignoreCase() const { return getFixedSlot(IGNORE_CASE_FLAG_SLOT).toBoolean(); }
bool global() const { return getFixedSlot(GLOBAL_FLAG_SLOT).toBoolean(); }
bool multiline() const { return getFixedSlot(MULTILINE_FLAG_SLOT).toBoolean(); }
bool sticky() const { return getFixedSlot(STICKY_FLAG_SLOT).toBoolean(); }
bool unicode() const { return getFixedSlot(UNICODE_FLAG_SLOT).toBoolean(); }
bool getShared(JSContext* cx, RegExpGuard* g);

View File

@ -198,7 +198,7 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=933681
gPrototypeProperties['RegExp'] =
["constructor", "toSource", "toString", "compile", "exec", "test",
"flags", "global", "ignoreCase", "multiline", "source", "sticky",
"flags", "global", "ignoreCase", "multiline", "source", "sticky", "unicode",
"lastIndex"];
// Sort an array that may contain symbols as well as strings.
@ -612,7 +612,7 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=933681
// Test with modified flags accessors
iwin.eval(`
var props = ["global", "ignoreCase", "multiline", "sticky", "source"];
var props = ["global", "ignoreCase", "multiline", "sticky", "source", "unicode"];
var origDescs = {};
for (var prop of props) {
origDescs[prop] = Object.getOwnPropertyDescriptor(RegExp.prototype, prop);