#393, Bug 1135377 - Part 10: Decrement index when it points trail surrogate that has corresponding lead surrogate. r=till, f=anba

This commit is contained in:
Tooru Fujisawa 2015-12-19 04:51:20 +09:00 committed by Cameron Kaiser
parent 8636a96b78
commit 522d06ab34
2 changed files with 69 additions and 0 deletions

View File

@ -14,12 +14,14 @@
#include "jit/InlinableNatives.h"
#include "vm/RegExpStatics.h"
#include "vm/StringBuffer.h"
#include "vm/Unicode.h"
#include "jsobjinlines.h"
#include "vm/NativeObject-inl.h"
using namespace js;
using namespace js::unicode;
using mozilla::ArrayLength;
using mozilla::Maybe;
@ -758,6 +760,29 @@ SetLastIndex(JSContext* cx, Handle<RegExpObject*> reobj, double lastIndex)
return true;
}
template <typename CharT>
static bool
IsTrailSurrogateWithLeadSurrogateImpl(JSContext* cx, HandleLinearString input, size_t index)
{
JS::AutoCheckCannotGC nogc;
MOZ_ASSERT(index > 0 && index < input->length());
const CharT* inputChars = input->chars<CharT>(nogc);
return unicode::IsTrailSurrogate(inputChars[index]) &&
unicode::IsLeadSurrogate(inputChars[index - 1]);
}
static bool
IsTrailSurrogateWithLeadSurrogate(JSContext* cx, HandleLinearString input, int32_t index)
{
if (index <= 0 || size_t(index) >= input->length())
return false;
return input->hasLatin1Chars()
? IsTrailSurrogateWithLeadSurrogateImpl<Latin1Char>(cx, input, index)
: IsTrailSurrogateWithLeadSurrogateImpl<char16_t>(cx, input, index);
}
/* ES6 final draft 21.2.5.2.2. */
RegExpRunStatus
js::ExecuteRegExp(JSContext* cx, HandleObject regexp, HandleString string,
@ -840,6 +865,33 @@ js::ExecuteRegExp(JSContext* cx, HandleObject regexp, HandleString string,
return RegExpRunStatus_Success_NotFound;
}
/* Steps 12-13. */
if (reobj->unicode()) {
/*
* ES6 21.2.2.2 step 2.
* Let listIndex be the index into Input of the character that was
* obtained from element index of str.
*
* In the spec, pattern match is performed with decoded Unicode code
* points, but our implementation performs it with UTF-16 encoded
* string. In step 2, we should decrement searchIndex (index) if it
* points the trail surrogate that has corresponding lead surrogate.
*
* var r = /\uD83D\uDC38/ug;
* r.lastIndex = 1;
* var str = "\uD83D\uDC38";
* var result = r.exec(str); // pattern match starts from index 0
* print(result.index); // prints 0
*
* Note: this doesn't match the current spec text and result in
* different values for `result.index` under certain conditions.
* However, the spec will change to match our implementation's
* behavior. See https://github.com/tc39/ecma262/issues/128.
*/
if (IsTrailSurrogateWithLeadSurrogate(cx, input, searchIndex))
searchIndex--;
}
/* Step 14-29. */
RegExpRunStatus status = ExecuteRegExpImpl(cx, res, *re, input, searchIndex, matches);
if (status == RegExpRunStatus_Error)

View File

@ -0,0 +1,17 @@
var BUGNUMBER = 1135377;
var summary = "Implement RegExp unicode flag -- Pattern match should start from lead surrogate when lastIndex points corresponding trail surrogate.";
print(BUGNUMBER + ": " + summary);
var r = /\uD83D\uDC38/ug;
r.lastIndex = 1;
var str = "\uD83D\uDC38";
var result = r.exec(str);
assertEq(result.length, 1);
assertEq(result[0], "\uD83D\uDC38");
// This does not match to ES6 spec, but the spec will be changed.
assertEq(result.index, 0);
if (typeof reportCompare === "function")
reportCompare(true, true);