mirror of
https://github.com/classilla/tenfourfox.git
synced 2024-09-28 20:56:36 +00:00
#393, Bug 1135377 - Part 1: Implement RegExp unicode flag. r=till, f=anba
This commit is contained in:
parent
ac6710b0fd
commit
d00063089b
@ -176,8 +176,11 @@ RegExpInitializeIgnoringLastIndex(JSContext* cx, Handle<RegExpObject*> obj,
|
||||
/* Steps 8-10. */
|
||||
CompileOptions options(cx);
|
||||
frontend::TokenStream dummyTokenStream(cx, options, nullptr, 0, nullptr);
|
||||
if (!irregexp::ParsePatternSyntax(dummyTokenStream, cx->tempLifoAlloc(), pattern))
|
||||
if (!irregexp::ParsePatternSyntax(dummyTokenStream, cx->tempLifoAlloc(), pattern,
|
||||
flags & UnicodeFlag))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (staticsUse == UseRegExpStatics) {
|
||||
RegExpStatics* res = cx->global()->getRegExpStatics(cx);
|
||||
@ -561,6 +564,24 @@ regexp_sticky(JSContext* cx, unsigned argc, JS::Value* vp)
|
||||
return CallNonGenericMethod<IsRegExpObject, regexp_sticky_impl>(cx, args);
|
||||
}
|
||||
|
||||
/* ES6 21.2.5.15. */
|
||||
MOZ_ALWAYS_INLINE bool
|
||||
regexp_unicode_impl(JSContext* cx, const CallArgs& args)
|
||||
{
|
||||
MOZ_ASSERT(IsRegExpObject(args.thisv()));
|
||||
/* Steps 4-6. */
|
||||
args.rval().setBoolean(args.thisv().toObject().as<RegExpObject>().unicode());
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
regexp_unicode(JSContext* cx, unsigned argc, JS::Value* vp)
|
||||
{
|
||||
/* Steps 1-3. */
|
||||
CallArgs args = CallArgsFromVp(argc, vp);
|
||||
return CallNonGenericMethod<IsRegExpObject, regexp_unicode_impl>(cx, args);
|
||||
}
|
||||
|
||||
const JSPropertySpec js::regexp_properties[] = {
|
||||
JS_SELF_HOSTED_GET("flags", "RegExpFlagsGetter", 0),
|
||||
JS_PSG("global", regexp_global, 0),
|
||||
@ -568,6 +589,7 @@ const JSPropertySpec js::regexp_properties[] = {
|
||||
JS_PSG("multiline", regexp_multiline, 0),
|
||||
JS_PSG("source", regexp_source, 0),
|
||||
JS_PSG("sticky", regexp_sticky, 0),
|
||||
JS_PSG("unicode", regexp_unicode, 0),
|
||||
JS_PS_END
|
||||
};
|
||||
|
||||
|
@ -25,9 +25,8 @@ function RegExpFlagsGetter() {
|
||||
result += "m";
|
||||
|
||||
// Steps 13-15.
|
||||
// TODO: Uncomment these steps when bug 1135377 is fixed.
|
||||
// if (R.unicode)
|
||||
// result += "u";
|
||||
if (R.unicode)
|
||||
result += "u";
|
||||
|
||||
// Steps 16-18.
|
||||
if (R.sticky)
|
||||
|
@ -1582,6 +1582,8 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
|
||||
reflags = RegExpFlag(reflags | MultilineFlag);
|
||||
else if (c == 'y' && !(reflags & StickyFlag))
|
||||
reflags = RegExpFlag(reflags | StickyFlag);
|
||||
else if (c == 'u' && !(reflags & UnicodeFlag))
|
||||
reflags = RegExpFlag(reflags | UnicodeFlag);
|
||||
else
|
||||
break;
|
||||
getChar();
|
||||
|
@ -205,7 +205,8 @@ RegExpBuilder::AddQuantifierToAtom(int min, int max,
|
||||
|
||||
template <typename CharT>
|
||||
RegExpParser<CharT>::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
|
||||
const CharT* chars, const CharT* end, bool multiline_mode)
|
||||
const CharT* chars, const CharT* end, bool multiline_mode,
|
||||
bool unicode)
|
||||
: ts(ts),
|
||||
alloc(alloc),
|
||||
captures_(nullptr),
|
||||
@ -215,6 +216,7 @@ RegExpParser<CharT>::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
|
||||
capture_count_(0),
|
||||
has_more_(true),
|
||||
multiline_(multiline_mode),
|
||||
unicode_(unicode),
|
||||
simple_(false),
|
||||
contains_anchor_(false),
|
||||
is_scanned_for_captures_(false)
|
||||
@ -1002,7 +1004,7 @@ template class irregexp::RegExpParser<char16_t>;
|
||||
template <typename CharT>
|
||||
static bool
|
||||
ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length,
|
||||
bool multiline, bool match_only, RegExpCompileData* data)
|
||||
bool multiline, bool match_only, bool unicode, RegExpCompileData* data)
|
||||
{
|
||||
if (match_only) {
|
||||
// Try to strip a leading '.*' from the RegExp, but only if it is not
|
||||
@ -1025,7 +1027,7 @@ ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, si
|
||||
}
|
||||
}
|
||||
|
||||
RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, multiline);
|
||||
RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, multiline, unicode);
|
||||
data->tree = parser.ParsePattern();
|
||||
if (!data->tree)
|
||||
return false;
|
||||
@ -1038,32 +1040,34 @@ ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, si
|
||||
|
||||
bool
|
||||
irregexp::ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
|
||||
bool multiline, bool match_only,
|
||||
bool multiline, bool match_only, bool unicode,
|
||||
RegExpCompileData* data)
|
||||
{
|
||||
JS::AutoCheckCannotGC nogc;
|
||||
return str->hasLatin1Chars()
|
||||
? ::ParsePattern(ts, alloc, str->latin1Chars(nogc), str->length(),
|
||||
multiline, match_only, data)
|
||||
multiline, match_only, unicode, data)
|
||||
: ::ParsePattern(ts, alloc, str->twoByteChars(nogc), str->length(),
|
||||
multiline, match_only, data);
|
||||
multiline, match_only, unicode, data);
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
static bool
|
||||
ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length)
|
||||
ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length,
|
||||
bool unicode)
|
||||
{
|
||||
LifoAllocScope scope(&alloc);
|
||||
|
||||
RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, false);
|
||||
RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, false, unicode);
|
||||
return parser.ParsePattern() != nullptr;
|
||||
}
|
||||
|
||||
bool
|
||||
irregexp::ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str)
|
||||
irregexp::ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
|
||||
bool unicode)
|
||||
{
|
||||
JS::AutoCheckCannotGC nogc;
|
||||
return str->hasLatin1Chars()
|
||||
? ::ParsePatternSyntax(ts, alloc, str->latin1Chars(nogc), str->length())
|
||||
: ::ParsePatternSyntax(ts, alloc, str->twoByteChars(nogc), str->length());
|
||||
? ::ParsePatternSyntax(ts, alloc, str->latin1Chars(nogc), str->length(), unicode)
|
||||
: ::ParsePatternSyntax(ts, alloc, str->twoByteChars(nogc), str->length(), unicode);
|
||||
}
|
||||
|
@ -43,11 +43,12 @@ namespace irregexp {
|
||||
|
||||
bool
|
||||
ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
|
||||
bool multiline, bool match_only,
|
||||
bool multiline, bool match_only, bool unicode,
|
||||
RegExpCompileData* data);
|
||||
|
||||
bool
|
||||
ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str);
|
||||
ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
|
||||
bool unicode);
|
||||
|
||||
// A BufferedVector is an automatically growing list, just like (and backed
|
||||
// by) a Vector, that is optimized for the case of adding and removing
|
||||
@ -174,7 +175,7 @@ class RegExpParser
|
||||
{
|
||||
public:
|
||||
RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
|
||||
const CharT* chars, const CharT* end, bool multiline_mode);
|
||||
const CharT* chars, const CharT* end, bool multiline_mode, bool unicode);
|
||||
|
||||
RegExpTree* ParsePattern();
|
||||
RegExpTree* ParseDisjunction();
|
||||
@ -288,6 +289,7 @@ class RegExpParser
|
||||
int capture_count_;
|
||||
bool has_more_;
|
||||
bool multiline_;
|
||||
bool unicode_;
|
||||
bool simple_;
|
||||
bool contains_anchor_;
|
||||
bool is_scanned_for_captures_;
|
||||
|
@ -4959,6 +4959,7 @@ JS_ObjectIsDate(JSContext* cx, JS::HandleObject obj, bool* isDate);
|
||||
#define JSREG_GLOB 0x02u /* global exec, creates array of matches */
|
||||
#define JSREG_MULTILINE 0x04u /* treat ^ and $ as begin and end of line */
|
||||
#define JSREG_STICKY 0x08u /* only match starting at lastIndex */
|
||||
#define JSREG_UNICODE 0x10u /* unicode */
|
||||
|
||||
extern JS_PUBLIC_API(JSObject*)
|
||||
JS_NewRegExpObject(JSContext* cx, JS::HandleObject obj, const char* bytes, size_t length,
|
||||
|
@ -10,7 +10,7 @@ var getters = [
|
||||
"multiline",
|
||||
"source",
|
||||
"sticky",
|
||||
//"unicode",
|
||||
"unicode",
|
||||
];
|
||||
|
||||
for (var name of getters) {
|
||||
@ -21,9 +21,5 @@ for (var name of getters) {
|
||||
assertEq("get" in desc, true);
|
||||
}
|
||||
|
||||
// When the /u flag is supported, remove this comment and the next line, and
|
||||
// uncomment "unicode" in |props| above.
|
||||
assertThrowsInstanceOf(() => RegExp("", "mygui").flags, SyntaxError);
|
||||
|
||||
if (typeof reportCompare === "function")
|
||||
reportCompare(true, true);
|
||||
|
@ -8,17 +8,14 @@ var props = [
|
||||
"ignoreCase",
|
||||
"multiline",
|
||||
"sticky",
|
||||
//"unicode",
|
||||
"unicode",
|
||||
];
|
||||
|
||||
testThrows(RegExp.prototype);
|
||||
test(/foo/iymg, [true, true, true, true, false]);
|
||||
test(RegExp(""), [false, false, false, false, false]);
|
||||
test(RegExp("", "mygi"), [true, true, true, true, false]);
|
||||
// When the /u flag is supported, remove the following line, uncomment the
|
||||
// next line, and uncomment "unicode" in |props| above.
|
||||
assertThrowsInstanceOf(() => RegExp("", "mygui").flags, SyntaxError);
|
||||
// test(RegExp("", "mygiu"), [true, true, true, true, true]);
|
||||
test(RegExp("", "mygiu"), [true, true, true, true, true]);
|
||||
|
||||
testThrowsGeneric();
|
||||
testThrowsGeneric(1);
|
||||
|
@ -7,16 +7,12 @@ assertEq(RegExp.prototype.flags, "");
|
||||
assertEq(/foo/iymg.flags, "gimy");
|
||||
assertEq(RegExp("").flags, "");
|
||||
assertEq(RegExp("", "mygi").flags, "gimy");
|
||||
// TODO: Uncomment lines 12, 16, 19 and remove lines 11, 15, 18 when bug 1135377 is fixed.
|
||||
assertThrowsInstanceOf(() => RegExp("", "mygui").flags, SyntaxError);
|
||||
// assertEq(RegExp("", "mygui").flags, "gimuy");
|
||||
assertEq(RegExp("", "mygui").flags, "gimuy");
|
||||
assertEq(genericFlags({}), "");
|
||||
assertEq(genericFlags({ignoreCase: true}), "i");
|
||||
assertEq(genericFlags({sticky:1, unicode:1, global: 0}), "y");
|
||||
// assertEq(genericFlags({sticky:1, unicode:1, global: 0}), "uy");
|
||||
assertEq(genericFlags({sticky:1, unicode:1, global: 0}), "uy");
|
||||
assertEq(genericFlags({__proto__: {multiline: true}}), "m");
|
||||
assertEq(genericFlags(new Proxy({}, {get(){return true}})), "gimy");
|
||||
// assertEq(genericFlags(new Proxy({}, {get(){return true}})), "gimuy");
|
||||
assertEq(genericFlags(new Proxy({}, {get(){return true}})), "gimuy");
|
||||
|
||||
assertThrowsInstanceOf(() => genericFlags(), TypeError);
|
||||
assertThrowsInstanceOf(() => genericFlags(1), TypeError);
|
||||
|
@ -39,6 +39,7 @@ JS_STATIC_ASSERT(IgnoreCaseFlag == JSREG_FOLD);
|
||||
JS_STATIC_ASSERT(GlobalFlag == JSREG_GLOB);
|
||||
JS_STATIC_ASSERT(MultilineFlag == JSREG_MULTILINE);
|
||||
JS_STATIC_ASSERT(StickyFlag == JSREG_STICKY);
|
||||
JS_STATIC_ASSERT(UnicodeFlag == JSREG_UNICODE);
|
||||
|
||||
RegExpObject*
|
||||
js::RegExpAlloc(ExclusiveContext* cx, HandleObject proto /* = nullptr */)
|
||||
@ -219,7 +220,7 @@ RegExpObject::createNoStatics(ExclusiveContext* cx, HandleAtom source, RegExpFla
|
||||
tokenStream = dummyTokenStream.ptr();
|
||||
}
|
||||
|
||||
if (!irregexp::ParsePatternSyntax(*tokenStream, alloc, source))
|
||||
if (!irregexp::ParsePatternSyntax(*tokenStream, alloc, source, flags & UnicodeFlag))
|
||||
return nullptr;
|
||||
|
||||
Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx));
|
||||
@ -267,6 +268,7 @@ RegExpObject::initIgnoringLastIndex(HandleAtom source, RegExpFlag flags)
|
||||
setIgnoreCase(flags & IgnoreCaseFlag);
|
||||
setMultiline(flags & MultilineFlag);
|
||||
setSticky(flags & StickyFlag);
|
||||
setUnicode(flags & UnicodeFlag);
|
||||
}
|
||||
|
||||
void
|
||||
@ -455,6 +457,8 @@ RegExpObject::toString(JSContext* cx) const
|
||||
return nullptr;
|
||||
if (multiline() && !sb.append('m'))
|
||||
return nullptr;
|
||||
if (unicode() && !sb.append('u'))
|
||||
return nullptr;
|
||||
if (sticky() && !sb.append('y'))
|
||||
return nullptr;
|
||||
|
||||
@ -515,7 +519,7 @@ RegExpShared::compile(JSContext* cx, HandleAtom pattern, HandleLinearString inpu
|
||||
/* Parse the pattern. */
|
||||
irregexp::RegExpCompileData data;
|
||||
if (!irregexp::ParsePattern(dummyTokenStream, cx->tempLifoAlloc(), pattern,
|
||||
multiline(), mode == MatchOnly, &data))
|
||||
multiline(), mode == MatchOnly, unicode(), &data))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
@ -949,6 +953,10 @@ ParseRegExpFlags(const CharT* chars, size_t length, RegExpFlag* flagsOut, char16
|
||||
if (!HandleRegExpFlag(StickyFlag, flagsOut))
|
||||
return false;
|
||||
break;
|
||||
case 'u':
|
||||
if (!HandleRegExpFlag(UnicodeFlag, flagsOut))
|
||||
return false;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
@ -51,9 +51,10 @@ enum RegExpFlag
|
||||
GlobalFlag = 0x02,
|
||||
MultilineFlag = 0x04,
|
||||
StickyFlag = 0x08,
|
||||
UnicodeFlag = 0x10,
|
||||
|
||||
NoFlags = 0x00,
|
||||
AllFlags = 0x0f
|
||||
AllFlags = 0x1f
|
||||
};
|
||||
|
||||
enum RegExpRunStatus
|
||||
@ -186,6 +187,7 @@ class RegExpShared
|
||||
bool global() const { return flags & GlobalFlag; }
|
||||
bool multiline() const { return flags & MultilineFlag; }
|
||||
bool sticky() const { return flags & StickyFlag; }
|
||||
bool unicode() const { return flags & UnicodeFlag; }
|
||||
|
||||
bool isCompiled(CompilationMode mode, bool latin1,
|
||||
ForceByteCodeEnum force = DontForceByteCode) const {
|
||||
@ -340,9 +342,10 @@ class RegExpObject : public NativeObject
|
||||
static const unsigned IGNORE_CASE_FLAG_SLOT = 3;
|
||||
static const unsigned MULTILINE_FLAG_SLOT = 4;
|
||||
static const unsigned STICKY_FLAG_SLOT = 5;
|
||||
static const unsigned UNICODE_FLAG_SLOT = 6;
|
||||
|
||||
public:
|
||||
static const unsigned RESERVED_SLOTS = 6;
|
||||
static const unsigned RESERVED_SLOTS = 7;
|
||||
static const unsigned PRIVATE_SLOT = 7;
|
||||
|
||||
static const Class class_;
|
||||
@ -407,6 +410,7 @@ class RegExpObject : public NativeObject
|
||||
flags |= ignoreCase() ? IgnoreCaseFlag : 0;
|
||||
flags |= multiline() ? MultilineFlag : 0;
|
||||
flags |= sticky() ? StickyFlag : 0;
|
||||
flags |= unicode() ? UnicodeFlag : 0;
|
||||
return RegExpFlag(flags);
|
||||
}
|
||||
|
||||
@ -432,10 +436,15 @@ class RegExpObject : public NativeObject
|
||||
setSlot(STICKY_FLAG_SLOT, BooleanValue(enabled));
|
||||
}
|
||||
|
||||
void setUnicode(bool enabled) {
|
||||
setSlot(UNICODE_FLAG_SLOT, BooleanValue(enabled));
|
||||
}
|
||||
|
||||
bool ignoreCase() const { return getFixedSlot(IGNORE_CASE_FLAG_SLOT).toBoolean(); }
|
||||
bool global() const { return getFixedSlot(GLOBAL_FLAG_SLOT).toBoolean(); }
|
||||
bool multiline() const { return getFixedSlot(MULTILINE_FLAG_SLOT).toBoolean(); }
|
||||
bool sticky() const { return getFixedSlot(STICKY_FLAG_SLOT).toBoolean(); }
|
||||
bool unicode() const { return getFixedSlot(UNICODE_FLAG_SLOT).toBoolean(); }
|
||||
|
||||
bool getShared(JSContext* cx, RegExpGuard* g);
|
||||
|
||||
|
@ -198,7 +198,7 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=933681
|
||||
|
||||
gPrototypeProperties['RegExp'] =
|
||||
["constructor", "toSource", "toString", "compile", "exec", "test",
|
||||
"flags", "global", "ignoreCase", "multiline", "source", "sticky",
|
||||
"flags", "global", "ignoreCase", "multiline", "source", "sticky", "unicode",
|
||||
"lastIndex"];
|
||||
|
||||
// Sort an array that may contain symbols as well as strings.
|
||||
@ -612,7 +612,7 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=933681
|
||||
|
||||
// Test with modified flags accessors
|
||||
iwin.eval(`
|
||||
var props = ["global", "ignoreCase", "multiline", "sticky", "source"];
|
||||
var props = ["global", "ignoreCase", "multiline", "sticky", "source", "unicode"];
|
||||
var origDescs = {};
|
||||
for (var prop of props) {
|
||||
origDescs[prop] = Object.getOwnPropertyDescriptor(RegExp.prototype, prop);
|
||||
|
Loading…
Reference in New Issue
Block a user