mirror of
https://github.com/classilla/tenfourfox.git
synced 2025-03-04 10:30:27 +00:00
#393, Bug 1135377 - Part 6: Support ignoreCase for BMP in RegExp with unicode flag. r=till, f=anba
This commit is contained in:
parent
45a4712b08
commit
cf744e9d35
@ -783,9 +783,10 @@ NativeRegExpMacroAssembler::CheckGreedyLoop(Label* on_tos_equals_current_positio
|
||||
}
|
||||
|
||||
void
|
||||
NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, Label* on_no_match)
|
||||
NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, Label* on_no_match,
|
||||
bool unicode)
|
||||
{
|
||||
JitSpew(SPEW_PREFIX "CheckNotBackReference(%d)", start_reg);
|
||||
JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d, %d)", start_reg, unicode);
|
||||
|
||||
Label fallthrough;
|
||||
Label success;
|
||||
@ -1034,8 +1035,13 @@ NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, Label
|
||||
masm.passABIArg(current_character);
|
||||
masm.passABIArg(current_position);
|
||||
masm.passABIArg(temp1);
|
||||
int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings;
|
||||
masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
|
||||
if (!unicode) {
|
||||
int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings;
|
||||
masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
|
||||
} else {
|
||||
int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareUCStrings;
|
||||
masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
|
||||
}
|
||||
masm.storeCallResult(temp0);
|
||||
|
||||
masm.PopRegsInMask(volatileRegs);
|
||||
@ -1047,7 +1053,9 @@ NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, Label
|
||||
// PowerPC specific version, somewhat more efficient (fixes issue 308)
|
||||
Register ppc0 = (temp1 == r6) ? r7 : r6;
|
||||
Register ppc1 = (temp1 == r8) ? r9 : r8;
|
||||
int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings;
|
||||
int (*fun)(const char16_t*, const char16_t*, size_t) = (unicode)
|
||||
? CaseInsensitiveCompareUCStrings
|
||||
: CaseInsensitiveCompareStrings ;
|
||||
|
||||
// This is lazy, but only incurs one extra x_subi.
|
||||
masm.x_mflr(r0);
|
||||
|
50
js/src/irregexp/NativeRegExpMacroAssembler.cpp.rej
Normal file
50
js/src/irregexp/NativeRegExpMacroAssembler.cpp.rej
Normal file
@ -0,0 +1,50 @@
|
||||
diff a/js/src/irregexp/NativeRegExpMacroAssembler.cpp b/js/src/irregexp/NativeRegExpMacroAssembler.cpp (rejected hunks)
|
||||
@@ -714,19 +714,20 @@ NativeRegExpMacroAssembler::CheckNotBack
|
||||
|
||||
// Restore backtrack stack pointer.
|
||||
masm.pop(backtrack_stack_pointer);
|
||||
|
||||
masm.bind(&fallthrough);
|
||||
}
|
||||
|
||||
void
|
||||
-NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, Label* on_no_match)
|
||||
+NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, Label* on_no_match,
|
||||
+ bool unicode)
|
||||
{
|
||||
- JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d)", start_reg);
|
||||
+ JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d, %d)", start_reg, unicode);
|
||||
|
||||
Label fallthrough;
|
||||
|
||||
masm.loadPtr(register_location(start_reg), current_character); // Index of start of capture
|
||||
masm.loadPtr(register_location(start_reg + 1), temp1); // Index of end of capture
|
||||
masm.subPtr(current_character, temp1); // Length of capture.
|
||||
|
||||
// The length of a capture should not be negative. This can only happen
|
||||
@@ -828,18 +829,23 @@ NativeRegExpMacroAssembler::CheckNotBack
|
||||
// Parameters are
|
||||
// Address byte_offset1 - Address captured substring's start.
|
||||
// Address byte_offset2 - Address of current character position.
|
||||
// size_t byte_length - length of capture in bytes(!)
|
||||
masm.setupUnalignedABICall(temp0);
|
||||
masm.passABIArg(current_character);
|
||||
masm.passABIArg(current_position);
|
||||
masm.passABIArg(temp1);
|
||||
- int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings;
|
||||
- masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
|
||||
+ if (!unicode) {
|
||||
+ int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings;
|
||||
+ masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
|
||||
+ } else {
|
||||
+ int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareUCStrings;
|
||||
+ masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
|
||||
+ }
|
||||
masm.storeCallResult(temp0);
|
||||
|
||||
masm.PopRegsInMask(volatileRegs);
|
||||
|
||||
// Check if function returned non-zero for success or zero for failure.
|
||||
masm.branchTest32(Assembler::Zero, temp0, temp0, BranchOrBacktrack(on_no_match));
|
||||
|
||||
// On success, increment position by length of capture.
|
@ -104,7 +104,7 @@ class MOZ_STACK_CLASS NativeRegExpMacroAssembler : public RegExpMacroAssembler
|
||||
void CheckGreedyLoop(jit::Label* on_tos_equals_current_position);
|
||||
void CheckNotAtStart(jit::Label* on_not_at_start);
|
||||
void CheckNotBackReference(int start_reg, jit::Label* on_no_match);
|
||||
void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match);
|
||||
void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match, bool unicode);
|
||||
void CheckNotCharacter(unsigned c, jit::Label* on_not_equal);
|
||||
void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_not_equal);
|
||||
void CheckNotCharacterAfterMinusAnd(char16_t c, char16_t minus, char16_t and_with,
|
||||
|
@ -90,7 +90,8 @@ V(CHECK_AT_START, 43, 8) /* bc8 pad24 addr32 */ \
|
||||
V(CHECK_NOT_AT_START, 44, 8) /* bc8 pad24 addr32 */ \
|
||||
V(CHECK_GREEDY, 45, 8) /* bc8 pad24 addr32 */ \
|
||||
V(ADVANCE_CP_AND_GOTO, 46, 8) /* bc8 offset24 addr32 */ \
|
||||
V(SET_CURRENT_POSITION_FROM_END, 47, 4) /* bc8 idx24 */
|
||||
V(SET_CURRENT_POSITION_FROM_END, 47, 4) /* bc8 idx24 */ \
|
||||
V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 48, 8) /* bc8 reg_idx24 addr32 */
|
||||
|
||||
#define DECLARE_BYTECODES(name, code, length) \
|
||||
static const int BC_##name = code;
|
||||
|
@ -82,11 +82,26 @@ static const int kSpaceAndSurrogateRangeCount = ArrayLength(kSpaceAndSurrogateRa
|
||||
static const int kWordRanges[] = {
|
||||
'0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 };
|
||||
static const int kWordRangeCount = ArrayLength(kWordRanges);
|
||||
static const int kIgnoreCaseWordRanges[] = {
|
||||
'0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1,
|
||||
0x017F, 0x017F + 1, 0x212A, 0x212A + 1,
|
||||
0x10000 };
|
||||
static const int kIgnoreCaseWordCount = ArrayLength(kIgnoreCaseWordRanges);
|
||||
static const int kWordAndSurrogateRanges[] = {
|
||||
'0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1,
|
||||
unicode::LeadSurrogateMin, unicode::TrailSurrogateMax + 1,
|
||||
0x10000 };
|
||||
static const int kWordAndSurrogateRangeCount = ArrayLength(kWordAndSurrogateRanges);
|
||||
static const int kNegatedIgnoreCaseWordAndSurrogateRanges[] = {
|
||||
0, '0', '9' + 1, 'A',
|
||||
'K', 'K' + 1, 'S', 'S' + 1,
|
||||
'Z' + 1, '_', '_' + 1, 'a',
|
||||
'k', 'k' + 1, 's', 's' + 1,
|
||||
'z' + 1, unicode::LeadSurrogateMin,
|
||||
unicode::TrailSurrogateMax + 1, 0x10000,
|
||||
0x10000 };
|
||||
static const int kNegatedIgnoreCaseWordAndSurrogateRangeCount =
|
||||
ArrayLength(kNegatedIgnoreCaseWordAndSurrogateRanges);
|
||||
static const int kDigitRanges[] = { '0', '9' + 1, 0x10000 };
|
||||
static const int kDigitRangeCount = ArrayLength(kDigitRanges);
|
||||
static const int kDigitAndSurrogateRanges[] = {
|
||||
@ -186,14 +201,29 @@ CharacterRange::AddClassEscape(LifoAlloc* alloc, char16_t type,
|
||||
// Add class escape, excluding surrogate pair range.
|
||||
void
|
||||
CharacterRange::AddClassEscapeUnicode(LifoAlloc* alloc, char16_t type,
|
||||
CharacterRangeVector* ranges)
|
||||
CharacterRangeVector* ranges, bool ignore_case)
|
||||
{
|
||||
switch (type) {
|
||||
case 's':
|
||||
case 'd':
|
||||
return AddClassEscape(alloc, type, ranges);
|
||||
break;
|
||||
case 'S':
|
||||
AddClassNegated(kSpaceAndSurrogateRanges, kSpaceAndSurrogateRangeCount, ranges);
|
||||
break;
|
||||
case 'w':
|
||||
if (ignore_case)
|
||||
AddClass(kIgnoreCaseWordRanges, kIgnoreCaseWordCount, ranges);
|
||||
else
|
||||
AddClassEscape(alloc, type, ranges);
|
||||
break;
|
||||
case 'W':
|
||||
AddClassNegated(kWordAndSurrogateRanges, kWordAndSurrogateRangeCount, ranges);
|
||||
if (ignore_case) {
|
||||
AddClass(kNegatedIgnoreCaseWordAndSurrogateRanges,
|
||||
kNegatedIgnoreCaseWordAndSurrogateRangeCount, ranges);
|
||||
} else {
|
||||
AddClassNegated(kWordAndSurrogateRanges, kWordAndSurrogateRangeCount, ranges);
|
||||
}
|
||||
break;
|
||||
case 'D':
|
||||
AddClassNegated(kDigitAndSurrogateRanges, kDigitAndSurrogateRangeCount, ranges);
|
||||
@ -203,20 +233,39 @@ CharacterRange::AddClassEscapeUnicode(LifoAlloc* alloc, char16_t type,
|
||||
}
|
||||
}
|
||||
|
||||
#define FOR_EACH_NON_ASCII_TO_ASCII_FOLDING(macro) \
|
||||
/* LATIN CAPITAL LETTER Y WITH DIAERESIS */ \
|
||||
macro(0x0178, 0x00FF) \
|
||||
/* LATIN SMALL LETTER LONG S */ \
|
||||
macro(0x017F, 0x0073) \
|
||||
/* LATIN CAPITAL LETTER SHARP S */ \
|
||||
macro(0x1E9E, 0x00DF) \
|
||||
/* KELVIN SIGN */ \
|
||||
macro(0x212A, 0x006B) \
|
||||
/* ANGSTROM SIGN */ \
|
||||
macro(0x212B, 0x00E5)
|
||||
|
||||
// We need to check for the following characters: 0x39c 0x3bc 0x178.
|
||||
static inline bool
|
||||
RangeContainsLatin1Equivalents(CharacterRange range)
|
||||
RangeContainsLatin1Equivalents(CharacterRange range, bool unicode)
|
||||
{
|
||||
// TODO(dcarney): this could be a lot more efficient.
|
||||
/* TODO(dcarney): this could be a lot more efficient. */
|
||||
if (unicode) {
|
||||
#define CHECK_RANGE(C, F) \
|
||||
if (range.Contains(C)) return true;
|
||||
FOR_EACH_NON_ASCII_TO_ASCII_FOLDING(CHECK_RANGE)
|
||||
#undef CHECK_RANGE
|
||||
}
|
||||
|
||||
return range.Contains(0x39c) || range.Contains(0x3bc) || range.Contains(0x178);
|
||||
}
|
||||
|
||||
static bool
|
||||
RangesContainLatin1Equivalents(const CharacterRangeVector& ranges)
|
||||
RangesContainLatin1Equivalents(const CharacterRangeVector& ranges, bool unicode)
|
||||
{
|
||||
for (size_t i = 0; i < ranges.length(); i++) {
|
||||
// TODO(dcarney): this could be a lot more efficient.
|
||||
if (RangeContainsLatin1Equivalents(ranges[i]))
|
||||
if (RangeContainsLatin1Equivalents(ranges[i], unicode))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -229,27 +278,24 @@ static const size_t kEcma262UnCanonicalizeMaxWidth = 4;
|
||||
static int
|
||||
GetCaseIndependentLetters(char16_t character,
|
||||
bool ascii_subject,
|
||||
bool unicode,
|
||||
const char16_t* choices,
|
||||
size_t choices_length,
|
||||
char16_t* letters)
|
||||
{
|
||||
const char16_t choices[] = {
|
||||
character,
|
||||
unicode::ToLowerCase(character),
|
||||
unicode::ToUpperCase(character)
|
||||
};
|
||||
|
||||
size_t count = 0;
|
||||
for (size_t i = 0; i < ArrayLength(choices); i++) {
|
||||
for (size_t i = 0; i < choices_length; i++) {
|
||||
char16_t c = choices[i];
|
||||
|
||||
// The standard requires that non-ASCII characters cannot have ASCII
|
||||
// character codes in their equivalence class, even though this
|
||||
// situation occurs multiple times in the unicode tables.
|
||||
static const unsigned kMaxAsciiCharCode = 127;
|
||||
if (character > kMaxAsciiCharCode && c <= kMaxAsciiCharCode)
|
||||
if (!unicode && character > kMaxAsciiCharCode && c <= kMaxAsciiCharCode)
|
||||
continue;
|
||||
|
||||
// Skip characters that can't appear in one byte strings.
|
||||
if (ascii_subject && c > kMaxOneByteCharCode)
|
||||
if (!unicode && ascii_subject && c > kMaxOneByteCharCode)
|
||||
continue;
|
||||
|
||||
// Watch for duplicates.
|
||||
@ -269,10 +315,45 @@ GetCaseIndependentLetters(char16_t character,
|
||||
return count;
|
||||
}
|
||||
|
||||
static int
|
||||
GetCaseIndependentLetters(char16_t character,
|
||||
bool ascii_subject,
|
||||
bool unicode,
|
||||
char16_t* letters)
|
||||
{
|
||||
if (unicode) {
|
||||
const char16_t choices[] = {
|
||||
character,
|
||||
unicode::FoldCase(character),
|
||||
unicode::ReverseFoldCase1(character),
|
||||
unicode::ReverseFoldCase2(character),
|
||||
unicode::ReverseFoldCase3(character),
|
||||
};
|
||||
return GetCaseIndependentLetters(character, ascii_subject, unicode,
|
||||
choices, ArrayLength(choices), letters);
|
||||
}
|
||||
|
||||
const char16_t choices[] = {
|
||||
character,
|
||||
unicode::ToLowerCase(character),
|
||||
unicode::ToUpperCase(character)
|
||||
};
|
||||
return GetCaseIndependentLetters(character, ascii_subject, unicode,
|
||||
choices, ArrayLength(choices), letters);
|
||||
}
|
||||
|
||||
static char16_t
|
||||
ConvertNonLatin1ToLatin1(char16_t c)
|
||||
ConvertNonLatin1ToLatin1(char16_t c, bool unicode)
|
||||
{
|
||||
MOZ_ASSERT(c > kMaxOneByteCharCode);
|
||||
if (unicode) {
|
||||
switch (c) {
|
||||
#define CONVERT(C, F) case C: return F;
|
||||
FOR_EACH_NON_ASCII_TO_ASCII_FOLDING(CONVERT)
|
||||
#undef CONVERT
|
||||
}
|
||||
}
|
||||
|
||||
switch (c) {
|
||||
// This are equivalent characters in unicode.
|
||||
case 0x39c:
|
||||
@ -287,12 +368,12 @@ ConvertNonLatin1ToLatin1(char16_t c)
|
||||
}
|
||||
|
||||
void
|
||||
CharacterRange::AddCaseEquivalents(bool is_ascii, CharacterRangeVector* ranges)
|
||||
CharacterRange::AddCaseEquivalents(bool is_ascii, bool unicode, CharacterRangeVector* ranges)
|
||||
{
|
||||
char16_t bottom = from();
|
||||
char16_t top = to();
|
||||
|
||||
if (is_ascii && !RangeContainsLatin1Equivalents(*this)) {
|
||||
if (is_ascii && !RangeContainsLatin1Equivalents(*this, unicode)) {
|
||||
if (bottom > kMaxOneByteCharCode)
|
||||
return;
|
||||
if (top > kMaxOneByteCharCode)
|
||||
@ -301,7 +382,7 @@ CharacterRange::AddCaseEquivalents(bool is_ascii, CharacterRangeVector* ranges)
|
||||
|
||||
for (char16_t c = bottom;; c++) {
|
||||
char16_t chars[kEcma262UnCanonicalizeMaxWidth];
|
||||
size_t length = GetCaseIndependentLetters(c, is_ascii, chars);
|
||||
size_t length = GetCaseIndependentLetters(c, is_ascii, unicode, chars);
|
||||
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
char16_t other = chars[i];
|
||||
@ -581,7 +662,7 @@ SeqRegExpNode::FillInBMInfo(int offset,
|
||||
}
|
||||
|
||||
RegExpNode*
|
||||
SeqRegExpNode::FilterASCII(int depth, bool ignore_case)
|
||||
SeqRegExpNode::FilterASCII(int depth, bool ignore_case, bool unicode)
|
||||
{
|
||||
if (info()->replacement_calculated)
|
||||
return replacement();
|
||||
@ -591,13 +672,13 @@ SeqRegExpNode::FilterASCII(int depth, bool ignore_case)
|
||||
|
||||
MOZ_ASSERT(!info()->visited);
|
||||
VisitMarker marker(info());
|
||||
return FilterSuccessor(depth - 1, ignore_case);
|
||||
return FilterSuccessor(depth - 1, ignore_case, unicode);
|
||||
}
|
||||
|
||||
RegExpNode*
|
||||
SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case)
|
||||
SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case, bool unicode)
|
||||
{
|
||||
RegExpNode* next = on_success_->FilterASCII(depth - 1, ignore_case);
|
||||
RegExpNode* next = on_success_->FilterASCII(depth - 1, ignore_case, unicode);
|
||||
if (next == nullptr)
|
||||
return set_replacement(nullptr);
|
||||
|
||||
@ -740,7 +821,7 @@ TextNode::GreedyLoopTextLength()
|
||||
}
|
||||
|
||||
RegExpNode*
|
||||
TextNode::FilterASCII(int depth, bool ignore_case)
|
||||
TextNode::FilterASCII(int depth, bool ignore_case, bool unicode)
|
||||
{
|
||||
if (info()->replacement_calculated)
|
||||
return replacement();
|
||||
@ -764,7 +845,7 @@ TextNode::FilterASCII(int depth, bool ignore_case)
|
||||
|
||||
// Here, we need to check for characters whose upper and lower cases
|
||||
// are outside the Latin-1 range.
|
||||
char16_t converted = ConvertNonLatin1ToLatin1(c);
|
||||
char16_t converted = ConvertNonLatin1ToLatin1(c, unicode);
|
||||
if (converted == 0) {
|
||||
// Character is outside Latin-1 completely
|
||||
return set_replacement(nullptr);
|
||||
@ -789,7 +870,7 @@ TextNode::FilterASCII(int depth, bool ignore_case)
|
||||
ranges[0].to() >= kMaxOneByteCharCode)
|
||||
{
|
||||
// This will be handled in a later filter.
|
||||
if (ignore_case && RangesContainLatin1Equivalents(ranges))
|
||||
if (ignore_case && RangesContainLatin1Equivalents(ranges, unicode))
|
||||
continue;
|
||||
return set_replacement(nullptr);
|
||||
}
|
||||
@ -798,14 +879,14 @@ TextNode::FilterASCII(int depth, bool ignore_case)
|
||||
ranges[0].from() > kMaxOneByteCharCode)
|
||||
{
|
||||
// This will be handled in a later filter.
|
||||
if (ignore_case && RangesContainLatin1Equivalents(ranges))
|
||||
if (ignore_case && RangesContainLatin1Equivalents(ranges, unicode))
|
||||
continue;
|
||||
return set_replacement(nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return FilterSuccessor(depth - 1, ignore_case);
|
||||
return FilterSuccessor(depth - 1, ignore_case, unicode);
|
||||
}
|
||||
|
||||
void
|
||||
@ -823,7 +904,7 @@ TextNode::CalculateOffsets()
|
||||
}
|
||||
}
|
||||
|
||||
void TextNode::MakeCaseIndependent(bool is_ascii)
|
||||
void TextNode::MakeCaseIndependent(bool is_ascii, bool unicode)
|
||||
{
|
||||
int element_count = elements().length();
|
||||
for (int i = 0; i < element_count; i++) {
|
||||
@ -839,7 +920,7 @@ void TextNode::MakeCaseIndependent(bool is_ascii)
|
||||
CharacterRangeVector& ranges = cc->ranges(alloc());
|
||||
int range_count = ranges.length();
|
||||
for (int j = 0; j < range_count; j++)
|
||||
ranges[j].AddCaseEquivalents(is_ascii, &ranges);
|
||||
ranges[j].AddCaseEquivalents(is_ascii, unicode, &ranges);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -988,7 +1069,7 @@ ChoiceNode::FillInBMInfo(int offset,
|
||||
}
|
||||
|
||||
RegExpNode*
|
||||
ChoiceNode::FilterASCII(int depth, bool ignore_case)
|
||||
ChoiceNode::FilterASCII(int depth, bool ignore_case, bool unicode)
|
||||
{
|
||||
if (info()->replacement_calculated)
|
||||
return replacement();
|
||||
@ -1012,7 +1093,7 @@ ChoiceNode::FilterASCII(int depth, bool ignore_case)
|
||||
for (int i = 0; i < choice_count; i++) {
|
||||
GuardedAlternative alternative = alternatives()[i];
|
||||
RegExpNode* replacement =
|
||||
alternative.node()->FilterASCII(depth - 1, ignore_case);
|
||||
alternative.node()->FilterASCII(depth - 1, ignore_case, unicode);
|
||||
MOZ_ASSERT(replacement != this); // No missing EMPTY_MATCH_CHECK.
|
||||
if (replacement != nullptr) {
|
||||
alternatives()[i].set_node(replacement);
|
||||
@ -1033,7 +1114,7 @@ ChoiceNode::FilterASCII(int depth, bool ignore_case)
|
||||
new_alternatives.reserve(surviving);
|
||||
for (int i = 0; i < choice_count; i++) {
|
||||
RegExpNode* replacement =
|
||||
alternatives()[i].node()->FilterASCII(depth - 1, ignore_case);
|
||||
alternatives()[i].node()->FilterASCII(depth - 1, ignore_case, unicode);
|
||||
if (replacement != nullptr) {
|
||||
alternatives()[i].set_node(replacement);
|
||||
AutoEnterOOMUnsafeRegion oomUnsafe;
|
||||
@ -1090,7 +1171,7 @@ NegativeLookaheadChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
|
||||
}
|
||||
|
||||
RegExpNode*
|
||||
NegativeLookaheadChoiceNode::FilterASCII(int depth, bool ignore_case)
|
||||
NegativeLookaheadChoiceNode::FilterASCII(int depth, bool ignore_case, bool unicode)
|
||||
{
|
||||
if (info()->replacement_calculated)
|
||||
return replacement();
|
||||
@ -1104,14 +1185,14 @@ NegativeLookaheadChoiceNode::FilterASCII(int depth, bool ignore_case)
|
||||
// Alternative 0 is the negative lookahead, alternative 1 is what comes
|
||||
// afterwards.
|
||||
RegExpNode* node = alternatives()[1].node();
|
||||
RegExpNode* replacement = node->FilterASCII(depth - 1, ignore_case);
|
||||
RegExpNode* replacement = node->FilterASCII(depth - 1, ignore_case, unicode);
|
||||
|
||||
if (replacement == nullptr)
|
||||
return set_replacement(nullptr);
|
||||
alternatives()[1].set_node(replacement);
|
||||
|
||||
RegExpNode* neg_node = alternatives()[0].node();
|
||||
RegExpNode* neg_replacement = neg_node->FilterASCII(depth - 1, ignore_case);
|
||||
RegExpNode* neg_replacement = neg_node->FilterASCII(depth - 1, ignore_case, unicode);
|
||||
|
||||
// If the negative lookahead is always going to fail then
|
||||
// we don't need to check it.
|
||||
@ -1192,7 +1273,7 @@ LoopChoiceNode::FillInBMInfo(int offset,
|
||||
}
|
||||
|
||||
RegExpNode*
|
||||
LoopChoiceNode::FilterASCII(int depth, bool ignore_case)
|
||||
LoopChoiceNode::FilterASCII(int depth, bool ignore_case, bool unicode)
|
||||
{
|
||||
if (info()->replacement_calculated)
|
||||
return replacement();
|
||||
@ -1205,7 +1286,7 @@ LoopChoiceNode::FilterASCII(int depth, bool ignore_case)
|
||||
VisitMarker marker(info());
|
||||
|
||||
RegExpNode* continue_replacement =
|
||||
continue_node_->FilterASCII(depth - 1, ignore_case);
|
||||
continue_node_->FilterASCII(depth - 1, ignore_case, unicode);
|
||||
|
||||
// If we can't continue after the loop then there is no sense in doing the
|
||||
// loop.
|
||||
@ -1213,7 +1294,7 @@ LoopChoiceNode::FilterASCII(int depth, bool ignore_case)
|
||||
return set_replacement(nullptr);
|
||||
}
|
||||
|
||||
return ChoiceNode::FilterASCII(depth - 1, ignore_case);
|
||||
return ChoiceNode::FilterASCII(depth - 1, ignore_case, unicode);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
@ -1242,7 +1323,7 @@ void
|
||||
Analysis::VisitText(TextNode* that)
|
||||
{
|
||||
if (ignore_case_)
|
||||
that->MakeCaseIndependent(is_ascii_);
|
||||
that->MakeCaseIndependent(is_ascii_, unicode_);
|
||||
EnsureAnalyzed(that->on_success());
|
||||
if (!has_failed()) {
|
||||
that->CalculateOffsets();
|
||||
@ -1534,7 +1615,7 @@ class irregexp::RegExpCompiler
|
||||
{
|
||||
public:
|
||||
RegExpCompiler(JSContext* cx, LifoAlloc* alloc, int capture_count,
|
||||
bool ignore_case, bool is_ascii, bool match_only);
|
||||
bool ignore_case, bool is_ascii, bool match_only, bool unicode);
|
||||
|
||||
int AllocateRegister() {
|
||||
if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
|
||||
@ -1571,6 +1652,7 @@ class irregexp::RegExpCompiler
|
||||
|
||||
inline bool ignore_case() { return ignore_case_; }
|
||||
inline bool ascii() { return ascii_; }
|
||||
inline bool unicode() { return unicode_; }
|
||||
FrequencyCollator* frequency_collator() { return &frequency_collator_; }
|
||||
|
||||
int current_expansion_factor() { return current_expansion_factor_; }
|
||||
@ -1592,6 +1674,7 @@ class irregexp::RegExpCompiler
|
||||
bool ignore_case_;
|
||||
bool ascii_;
|
||||
bool match_only_;
|
||||
bool unicode_;
|
||||
bool reg_exp_too_big_;
|
||||
int current_expansion_factor_;
|
||||
FrequencyCollator frequency_collator_;
|
||||
@ -1614,12 +1697,13 @@ class RecursionCheck
|
||||
// Attempts to compile the regexp using an Irregexp code generator. Returns
|
||||
// a fixed array or a null handle depending on whether it succeeded.
|
||||
RegExpCompiler::RegExpCompiler(JSContext* cx, LifoAlloc* alloc, int capture_count,
|
||||
bool ignore_case, bool ascii, bool match_only)
|
||||
bool ignore_case, bool ascii, bool match_only, bool unicode)
|
||||
: next_register_(2 * (capture_count + 1)),
|
||||
recursion_depth_(0),
|
||||
ignore_case_(ignore_case),
|
||||
ascii_(ascii),
|
||||
match_only_(match_only),
|
||||
unicode_(unicode),
|
||||
reg_exp_too_big_(false),
|
||||
current_expansion_factor_(1),
|
||||
frequency_collator_(),
|
||||
@ -1692,7 +1776,8 @@ IsNativeRegExpEnabled(JSContext* cx)
|
||||
RegExpCode
|
||||
irregexp::CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData* data,
|
||||
HandleLinearString sample, bool is_global, bool ignore_case,
|
||||
bool is_ascii, bool match_only, bool force_bytecode, bool sticky)
|
||||
bool is_ascii, bool match_only, bool force_bytecode, bool sticky,
|
||||
bool unicode)
|
||||
{
|
||||
if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
|
||||
JS_ReportError(cx, "regexp too big");
|
||||
@ -1700,7 +1785,8 @@ irregexp::CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData*
|
||||
}
|
||||
|
||||
LifoAlloc& alloc = cx->tempLifoAlloc();
|
||||
RegExpCompiler compiler(cx, &alloc, data->capture_count, ignore_case, is_ascii, match_only);
|
||||
RegExpCompiler compiler(cx, &alloc, data->capture_count, ignore_case, is_ascii, match_only,
|
||||
unicode);
|
||||
|
||||
// Sample some characters from the middle of the string.
|
||||
if (sample->hasLatin1Chars()) {
|
||||
@ -1746,18 +1832,18 @@ irregexp::CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData*
|
||||
}
|
||||
}
|
||||
if (is_ascii) {
|
||||
node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case);
|
||||
node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case, unicode);
|
||||
// Do it again to propagate the new nodes to places where they were not
|
||||
// put because they had not been calculated yet.
|
||||
if (node != nullptr) {
|
||||
node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case);
|
||||
node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case, unicode);
|
||||
}
|
||||
}
|
||||
|
||||
if (node == nullptr)
|
||||
node = alloc.newInfallible<EndNode>(&alloc, EndNode::BACKTRACK);
|
||||
|
||||
Analysis analysis(cx, ignore_case, is_ascii);
|
||||
Analysis analysis(cx, ignore_case, is_ascii, unicode);
|
||||
analysis.EnsureAnalyzed(node);
|
||||
if (analysis.has_failed()) {
|
||||
JS_ReportError(cx, analysis.errorMessage());
|
||||
@ -3597,7 +3683,7 @@ EmitAtomNonLetter(RegExpCompiler* compiler,
|
||||
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
|
||||
bool ascii = compiler->ascii();
|
||||
char16_t chars[kEcma262UnCanonicalizeMaxWidth];
|
||||
int length = GetCaseIndependentLetters(c, ascii, chars);
|
||||
int length = GetCaseIndependentLetters(c, ascii, compiler->unicode(), chars);
|
||||
if (length < 1) {
|
||||
// This can't match. Must be an ASCII subject and a non-ASCII character.
|
||||
// We do not need to do anything since the ASCII pass already handled this.
|
||||
@ -3673,7 +3759,7 @@ EmitAtomLetter(RegExpCompiler* compiler,
|
||||
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
|
||||
bool ascii = compiler->ascii();
|
||||
char16_t chars[kEcma262UnCanonicalizeMaxWidth];
|
||||
int length = GetCaseIndependentLetters(c, ascii, chars);
|
||||
int length = GetCaseIndependentLetters(c, ascii, compiler->unicode(), chars);
|
||||
if (length <= 1) return false;
|
||||
// We may not need to check against the end of the input string
|
||||
// if this character lies before a character that matched.
|
||||
@ -4538,7 +4624,8 @@ BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace)
|
||||
MOZ_ASSERT(start_reg_ + 1 == end_reg_);
|
||||
if (compiler->ignore_case()) {
|
||||
assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
|
||||
trace->backtrack());
|
||||
trace->backtrack(),
|
||||
compiler->unicode());
|
||||
} else {
|
||||
assembler->CheckNotBackReference(start_reg_, trace->backtrack());
|
||||
}
|
||||
@ -4684,6 +4771,7 @@ TextNode::FillInBMInfo(int initial_offset,
|
||||
char16_t chars[kEcma262UnCanonicalizeMaxWidth];
|
||||
int length = GetCaseIndependentLetters(character,
|
||||
bm->max_char() == kMaxOneByteCharCode,
|
||||
bm->compiler()->unicode(),
|
||||
chars);
|
||||
for (int j = 0; j < length; j++)
|
||||
bm->Set(offset, chars[j]);
|
||||
@ -4775,7 +4863,8 @@ TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
|
||||
}
|
||||
if (compiler->ignore_case()) {
|
||||
char16_t chars[kEcma262UnCanonicalizeMaxWidth];
|
||||
size_t length = GetCaseIndependentLetters(c, compiler->ascii(), chars);
|
||||
size_t length = GetCaseIndependentLetters(c, compiler->ascii(),
|
||||
compiler->unicode(), chars);
|
||||
MOZ_ASSERT(length != 0); // Can only happen if c > char_mask (see above).
|
||||
if (length == 1) {
|
||||
// This letter has no case equivalents, so it's nice and simple
|
||||
|
@ -88,7 +88,8 @@ struct RegExpCode
|
||||
RegExpCode
|
||||
CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData* data,
|
||||
HandleLinearString sample, bool is_global, bool ignore_case,
|
||||
bool is_ascii, bool match_only, bool force_bytecode, bool sticky);
|
||||
bool is_ascii, bool match_only, bool force_bytecode, bool sticky,
|
||||
bool unicode);
|
||||
|
||||
// Note: this may return RegExpRunStatus_Error if an interrupt was requested
|
||||
// while the code was executing.
|
||||
@ -145,7 +146,7 @@ class CharacterRange
|
||||
|
||||
static void AddClassEscape(LifoAlloc* alloc, char16_t type, CharacterRangeVector* ranges);
|
||||
static void AddClassEscapeUnicode(LifoAlloc* alloc, char16_t type,
|
||||
CharacterRangeVector* ranges);
|
||||
CharacterRangeVector* ranges, bool ignoreCase);
|
||||
|
||||
static inline CharacterRange Singleton(char16_t value) {
|
||||
return CharacterRange(value, value);
|
||||
@ -165,7 +166,7 @@ class CharacterRange
|
||||
bool is_valid() { return from_ <= to_; }
|
||||
bool IsEverything(char16_t max) { return from_ == 0 && to_ >= max; }
|
||||
bool IsSingleton() { return (from_ == to_); }
|
||||
void AddCaseEquivalents(bool is_ascii, CharacterRangeVector* ranges);
|
||||
void AddCaseEquivalents(bool is_ascii, bool unicode, CharacterRangeVector* ranges);
|
||||
|
||||
static void Split(const LifoAlloc* alloc,
|
||||
CharacterRangeVector base,
|
||||
@ -518,7 +519,7 @@ class RegExpNode
|
||||
// If we know that the input is ASCII then there are some nodes that can
|
||||
// never match. This method returns a node that can be substituted for
|
||||
// itself, or nullptr if the node can never match.
|
||||
virtual RegExpNode* FilterASCII(int depth, bool ignore_case) { return this; }
|
||||
virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode) { return this; }
|
||||
|
||||
// Helper for FilterASCII.
|
||||
RegExpNode* replacement() {
|
||||
@ -625,14 +626,14 @@ class SeqRegExpNode : public RegExpNode
|
||||
|
||||
RegExpNode* on_success() { return on_success_; }
|
||||
void set_on_success(RegExpNode* node) { on_success_ = node; }
|
||||
virtual RegExpNode* FilterASCII(int depth, bool ignore_case);
|
||||
virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode);
|
||||
virtual bool FillInBMInfo(int offset,
|
||||
int budget,
|
||||
BoyerMooreLookahead* bm,
|
||||
bool not_at_start);
|
||||
|
||||
protected:
|
||||
RegExpNode* FilterSuccessor(int depth, bool ignore_case);
|
||||
RegExpNode* FilterSuccessor(int depth, bool ignore_case, bool unicode);
|
||||
|
||||
private:
|
||||
RegExpNode* on_success_;
|
||||
@ -750,7 +751,7 @@ class TextNode : public SeqRegExpNode
|
||||
int characters_filled_in,
|
||||
bool not_at_start);
|
||||
TextElementVector& elements() { return *elements_; }
|
||||
void MakeCaseIndependent(bool is_ascii);
|
||||
void MakeCaseIndependent(bool is_ascii, bool unicode);
|
||||
virtual int GreedyLoopTextLength();
|
||||
virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
|
||||
RegExpCompiler* compiler);
|
||||
@ -759,7 +760,7 @@ class TextNode : public SeqRegExpNode
|
||||
BoyerMooreLookahead* bm,
|
||||
bool not_at_start);
|
||||
void CalculateOffsets();
|
||||
virtual RegExpNode* FilterASCII(int depth, bool ignore_case);
|
||||
virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode);
|
||||
|
||||
private:
|
||||
enum TextEmitPassType {
|
||||
@ -1013,7 +1014,7 @@ class ChoiceNode : public RegExpNode
|
||||
void set_not_at_start() { not_at_start_ = true; }
|
||||
void set_being_calculated(bool b) { being_calculated_ = b; }
|
||||
virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; }
|
||||
virtual RegExpNode* FilterASCII(int depth, bool ignore_case);
|
||||
virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode);
|
||||
|
||||
protected:
|
||||
int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative);
|
||||
@ -1066,7 +1067,7 @@ class NegativeLookaheadChoiceNode : public ChoiceNode
|
||||
// characters, but on a negative lookahead the negative branch did not take
|
||||
// part in that calculation (EatsAtLeast) so the assumptions don't hold.
|
||||
virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; }
|
||||
virtual RegExpNode* FilterASCII(int depth, bool ignore_case);
|
||||
virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode);
|
||||
};
|
||||
|
||||
class LoopChoiceNode : public ChoiceNode
|
||||
@ -1095,7 +1096,7 @@ class LoopChoiceNode : public ChoiceNode
|
||||
RegExpNode* continue_node() { return continue_node_; }
|
||||
bool body_can_be_zero_length() { return body_can_be_zero_length_; }
|
||||
virtual void Accept(NodeVisitor* visitor);
|
||||
virtual RegExpNode* FilterASCII(int depth, bool ignore_case);
|
||||
virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode);
|
||||
|
||||
private:
|
||||
// AddAlternative is made private for loop nodes because alternatives
|
||||
@ -1466,10 +1467,11 @@ class NodeVisitor
|
||||
class Analysis : public NodeVisitor
|
||||
{
|
||||
public:
|
||||
Analysis(JSContext* cx, bool ignore_case, bool is_ascii)
|
||||
Analysis(JSContext* cx, bool ignore_case, bool is_ascii, bool unicode)
|
||||
: cx(cx),
|
||||
ignore_case_(ignore_case),
|
||||
is_ascii_(is_ascii),
|
||||
unicode_(unicode),
|
||||
error_message_(nullptr)
|
||||
{}
|
||||
|
||||
@ -1494,6 +1496,7 @@ class Analysis : public NodeVisitor
|
||||
JSContext* cx;
|
||||
bool ignore_case_;
|
||||
bool is_ascii_;
|
||||
bool unicode_;
|
||||
const char* error_message_;
|
||||
|
||||
Analysis(Analysis&) = delete;
|
||||
|
@ -442,6 +442,27 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha
|
||||
}
|
||||
break;
|
||||
}
|
||||
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE) {
|
||||
int from = registers[insn >> BYTECODE_SHIFT];
|
||||
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
|
||||
if (from < 0 || len <= 0) {
|
||||
pc += BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_LENGTH;
|
||||
break;
|
||||
}
|
||||
if (current + len > length) {
|
||||
pc = byteCode + Load32Aligned(pc + 4);
|
||||
break;
|
||||
}
|
||||
if (CaseInsensitiveCompareUCStrings(chars + from, chars + current,
|
||||
len * sizeof(CharT)))
|
||||
{
|
||||
current += len;
|
||||
pc += BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_LENGTH;
|
||||
} else {
|
||||
pc = byteCode + Load32Aligned(pc + 4);
|
||||
}
|
||||
break;
|
||||
}
|
||||
BYTECODE(CHECK_AT_START)
|
||||
if (current == 0)
|
||||
pc = byteCode + Load32Aligned(pc + 4);
|
||||
|
@ -65,6 +65,38 @@ template int
|
||||
irregexp::CaseInsensitiveCompareStrings(const char16_t* substring1, const char16_t* substring2,
|
||||
size_t byteLength);
|
||||
|
||||
template <typename CharT>
|
||||
int
|
||||
irregexp::CaseInsensitiveCompareUCStrings(const CharT* substring1, const CharT* substring2,
|
||||
size_t byteLength)
|
||||
{
|
||||
MOZ_ASSERT(byteLength % sizeof(CharT) == 0);
|
||||
size_t length = byteLength / sizeof(CharT);
|
||||
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
char16_t c1 = substring1[i];
|
||||
char16_t c2 = substring2[i];
|
||||
if (c1 != c2) {
|
||||
c1 = unicode::FoldCase(c1);
|
||||
c2 = unicode::FoldCase(c2);
|
||||
if (c1 != c2)
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
template int
|
||||
irregexp::CaseInsensitiveCompareUCStrings(const Latin1Char* substring1,
|
||||
const Latin1Char* substring2,
|
||||
size_t byteLength);
|
||||
|
||||
template int
|
||||
irregexp::CaseInsensitiveCompareUCStrings(const char16_t* substring1,
|
||||
const char16_t* substring2,
|
||||
size_t byteLength);
|
||||
|
||||
InterpretedRegExpMacroAssembler::InterpretedRegExpMacroAssembler(LifoAlloc* alloc, RegExpShared* shared,
|
||||
size_t numSavedRegisters)
|
||||
: RegExpMacroAssembler(*alloc, shared, numSavedRegisters),
|
||||
@ -210,11 +242,16 @@ InterpretedRegExpMacroAssembler::CheckNotBackReference(int start_reg, jit::Label
|
||||
}
|
||||
|
||||
void
|
||||
InterpretedRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match)
|
||||
InterpretedRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg,
|
||||
jit::Label* on_no_match,
|
||||
bool unicode)
|
||||
{
|
||||
MOZ_ASSERT(start_reg >= 0);
|
||||
MOZ_ASSERT(start_reg <= kMaxRegister);
|
||||
Emit(BC_CHECK_NOT_BACK_REF_NO_CASE, start_reg);
|
||||
if (unicode)
|
||||
Emit(BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE, start_reg);
|
||||
else
|
||||
Emit(BC_CHECK_NOT_BACK_REF_NO_CASE, start_reg);
|
||||
EmitOrLink(on_no_match);
|
||||
}
|
||||
|
||||
|
@ -112,7 +112,8 @@ class MOZ_STACK_CLASS RegExpMacroAssembler
|
||||
virtual void CheckGreedyLoop(jit::Label* on_tos_equals_current_position) = 0;
|
||||
virtual void CheckNotAtStart(jit::Label* on_not_at_start) = 0;
|
||||
virtual void CheckNotBackReference(int start_reg, jit::Label* on_no_match) = 0;
|
||||
virtual void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match) = 0;
|
||||
virtual void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match,
|
||||
bool unicode) = 0;
|
||||
|
||||
// Check the current character for a match with a literal character. If we
|
||||
// fail to match then goto the on_failure label. End of input always
|
||||
@ -221,6 +222,11 @@ template <typename CharT>
|
||||
int
|
||||
CaseInsensitiveCompareStrings(const CharT* substring1, const CharT* substring2, size_t byteLength);
|
||||
|
||||
template <typename CharT>
|
||||
int
|
||||
CaseInsensitiveCompareUCStrings(const CharT* substring1, const CharT* substring2,
|
||||
size_t byteLength);
|
||||
|
||||
class MOZ_STACK_CLASS InterpretedRegExpMacroAssembler : public RegExpMacroAssembler
|
||||
{
|
||||
public:
|
||||
@ -241,7 +247,7 @@ class MOZ_STACK_CLASS InterpretedRegExpMacroAssembler : public RegExpMacroAssemb
|
||||
void CheckGreedyLoop(jit::Label* on_tos_equals_current_position);
|
||||
void CheckNotAtStart(jit::Label* on_not_at_start);
|
||||
void CheckNotBackReference(int start_reg, jit::Label* on_no_match);
|
||||
void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match);
|
||||
void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match, bool unicode);
|
||||
void CheckNotCharacter(unsigned c, jit::Label* on_not_equal);
|
||||
void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_not_equal);
|
||||
void CheckNotCharacterAfterMinusAnd(char16_t c, char16_t minus, char16_t and_with,
|
||||
|
@ -206,7 +206,7 @@ RegExpBuilder::AddQuantifierToAtom(int min, int max,
|
||||
template <typename CharT>
|
||||
RegExpParser<CharT>::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
|
||||
const CharT* chars, const CharT* end, bool multiline_mode,
|
||||
bool unicode)
|
||||
bool unicode, bool ignore_case)
|
||||
: ts(ts),
|
||||
alloc(alloc),
|
||||
captures_(nullptr),
|
||||
@ -217,6 +217,7 @@ RegExpParser<CharT>::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
|
||||
has_more_(true),
|
||||
multiline_(multiline_mode),
|
||||
unicode_(unicode),
|
||||
ignore_case_(ignore_case),
|
||||
simple_(false),
|
||||
contains_anchor_(false),
|
||||
is_scanned_for_captures_(false)
|
||||
@ -609,10 +610,11 @@ AddCharOrEscapeUnicode(LifoAlloc* alloc,
|
||||
CharacterRangeVector* trail_ranges,
|
||||
WideCharRangeVector* wide_ranges,
|
||||
char16_t char_class,
|
||||
widechar c)
|
||||
widechar c,
|
||||
bool ignore_case)
|
||||
{
|
||||
if (char_class != kNoCharClass) {
|
||||
CharacterRange::AddClassEscapeUnicode(alloc, char_class, ranges);
|
||||
CharacterRange::AddClassEscapeUnicode(alloc, char_class, ranges, ignore_case);
|
||||
switch (char_class) {
|
||||
case 'S':
|
||||
case 'W':
|
||||
@ -896,7 +898,7 @@ RegExpParser<CharT>::ParseCharacterClass()
|
||||
} else if (current() == ']') {
|
||||
if (unicode_) {
|
||||
AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges,
|
||||
char_class, first);
|
||||
char_class, first, ignore_case_);
|
||||
} else {
|
||||
AddCharOrEscape(alloc, ranges, char_class, first);
|
||||
}
|
||||
@ -926,7 +928,7 @@ RegExpParser<CharT>::ParseCharacterClass()
|
||||
} else {
|
||||
if (unicode_) {
|
||||
AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges,
|
||||
char_class, first);
|
||||
char_class, first, ignore_case_);
|
||||
} else {
|
||||
AddCharOrEscape(alloc, ranges, char_class, first);
|
||||
}
|
||||
@ -1228,13 +1230,14 @@ UnicodeEverythingAtom(LifoAlloc* alloc)
|
||||
}
|
||||
|
||||
RegExpTree*
|
||||
UnicodeCharacterClassEscapeAtom(LifoAlloc* alloc, char16_t char_class)
|
||||
UnicodeCharacterClassEscapeAtom(LifoAlloc* alloc, char16_t char_class, bool ignore_case)
|
||||
{
|
||||
CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
|
||||
CharacterRangeVector* lead_ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
|
||||
CharacterRangeVector* trail_ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
|
||||
WideCharRangeVector* wide_ranges = alloc->newInfallible<WideCharRangeVector>(*alloc);
|
||||
AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, char_class, 0);
|
||||
AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, char_class, 0,
|
||||
ignore_case);
|
||||
|
||||
return UnicodeRangesAtom(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, false);
|
||||
}
|
||||
@ -1406,7 +1409,8 @@ RegExpParser<CharT>::ParseDisjunction()
|
||||
case 'D': case 'S': case 'W':
|
||||
if (unicode_) {
|
||||
Advance();
|
||||
builder->AddAtom(UnicodeCharacterClassEscapeAtom(alloc, current()));
|
||||
builder->AddAtom(UnicodeCharacterClassEscapeAtom(alloc, current(),
|
||||
ignore_case_));
|
||||
Advance();
|
||||
break;
|
||||
}
|
||||
@ -1416,7 +1420,10 @@ RegExpParser<CharT>::ParseDisjunction()
|
||||
Advance(2);
|
||||
CharacterRangeVector* ranges =
|
||||
alloc->newInfallible<CharacterRangeVector>(*alloc);
|
||||
CharacterRange::AddClassEscape(alloc, c, ranges);
|
||||
if (unicode_)
|
||||
CharacterRange::AddClassEscapeUnicode(alloc, c, ranges, ignore_case_);
|
||||
else
|
||||
CharacterRange::AddClassEscape(alloc, c, ranges);
|
||||
RegExpTree* atom = alloc->newInfallible<RegExpCharacterClass>(ranges, false);
|
||||
builder->AddAtom(atom);
|
||||
break;
|
||||
@ -1628,7 +1635,8 @@ template class irregexp::RegExpParser<char16_t>;
|
||||
template <typename CharT>
|
||||
static bool
|
||||
ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length,
|
||||
bool multiline, bool match_only, bool unicode, RegExpCompileData* data)
|
||||
bool multiline, bool match_only, bool unicode, bool ignore_case,
|
||||
RegExpCompileData* data)
|
||||
{
|
||||
if (match_only) {
|
||||
// Try to strip a leading '.*' from the RegExp, but only if it is not
|
||||
@ -1651,7 +1659,7 @@ ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, si
|
||||
}
|
||||
}
|
||||
|
||||
RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, multiline, unicode);
|
||||
RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, multiline, unicode, ignore_case);
|
||||
data->tree = parser.ParsePattern();
|
||||
if (!data->tree)
|
||||
return false;
|
||||
@ -1664,15 +1672,15 @@ ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, si
|
||||
|
||||
bool
|
||||
irregexp::ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
|
||||
bool multiline, bool match_only, bool unicode,
|
||||
bool multiline, bool match_only, bool unicode, bool ignore_case,
|
||||
RegExpCompileData* data)
|
||||
{
|
||||
JS::AutoCheckCannotGC nogc;
|
||||
return str->hasLatin1Chars()
|
||||
? ::ParsePattern(ts, alloc, str->latin1Chars(nogc), str->length(),
|
||||
multiline, match_only, unicode, data)
|
||||
multiline, match_only, unicode, ignore_case, data)
|
||||
: ::ParsePattern(ts, alloc, str->twoByteChars(nogc), str->length(),
|
||||
multiline, match_only, unicode, data);
|
||||
multiline, match_only, unicode, ignore_case, data);
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
@ -1682,7 +1690,7 @@ ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* cha
|
||||
{
|
||||
LifoAllocScope scope(&alloc);
|
||||
|
||||
RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, false, unicode);
|
||||
RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, false, unicode, false);
|
||||
return parser.ParsePattern() != nullptr;
|
||||
}
|
||||
|
||||
|
@ -43,7 +43,7 @@ namespace irregexp {
|
||||
|
||||
bool
|
||||
ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
|
||||
bool multiline, bool match_only, bool unicode,
|
||||
bool multiline, bool match_only, bool unicode, bool ignore_case,
|
||||
RegExpCompileData* data);
|
||||
|
||||
bool
|
||||
@ -175,7 +175,8 @@ class RegExpParser
|
||||
{
|
||||
public:
|
||||
RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
|
||||
const CharT* chars, const CharT* end, bool multiline_mode, bool unicode);
|
||||
const CharT* chars, const CharT* end, bool multiline_mode, bool unicode,
|
||||
bool ignore_case);
|
||||
|
||||
RegExpTree* ParsePattern();
|
||||
RegExpTree* ParseDisjunction();
|
||||
@ -296,6 +297,7 @@ class RegExpParser
|
||||
bool has_more_;
|
||||
bool multiline_;
|
||||
bool unicode_;
|
||||
bool ignore_case_;
|
||||
bool simple_;
|
||||
bool contains_anchor_;
|
||||
bool is_scanned_for_captures_;
|
||||
|
45
js/src/tests/ecma_6/RegExp/unicode-ignoreCase-ascii.js
Normal file
45
js/src/tests/ecma_6/RegExp/unicode-ignoreCase-ascii.js
Normal file
@ -0,0 +1,45 @@
|
||||
var BUGNUMBER = 1135377;
|
||||
var summary = "Implement RegExp unicode flag -- ignoreCase flag with non-ascii to ascii map.";
|
||||
|
||||
print(BUGNUMBER + ": " + summary);
|
||||
|
||||
// LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
assertEqArray(/\u0178/iu.exec("\u00FF"),
|
||||
["\u00FF"]);
|
||||
assertEqArray(/\u00FF/iu.exec("\u0178"),
|
||||
["\u0178"]);
|
||||
|
||||
// LATIN SMALL LETTER LONG S
|
||||
assertEqArray(/\u017F/iu.exec("S"),
|
||||
["S"]);
|
||||
assertEqArray(/\u017F/iu.exec("s"),
|
||||
["s"]);
|
||||
assertEqArray(/S/iu.exec("\u017F"),
|
||||
["\u017F"]);
|
||||
assertEqArray(/s/iu.exec("\u017F"),
|
||||
["\u017F"]);
|
||||
|
||||
// LATIN CAPITAL LETTER SHARP S
|
||||
assertEqArray(/\u1E9E/iu.exec("\u00DF"),
|
||||
["\u00DF"]);
|
||||
assertEqArray(/\u00DF/iu.exec("\u1E9E"),
|
||||
["\u1E9E"]);
|
||||
|
||||
// KELVIN SIGN
|
||||
assertEqArray(/\u212A/iu.exec("K"),
|
||||
["K"]);
|
||||
assertEqArray(/\u212A/iu.exec("k"),
|
||||
["k"]);
|
||||
assertEqArray(/K/iu.exec("\u212A"),
|
||||
["\u212A"]);
|
||||
assertEqArray(/k/iu.exec("\u212A"),
|
||||
["\u212A"]);
|
||||
|
||||
// ANGSTROM SIGN
|
||||
assertEqArray(/\u212B/iu.exec("\u00E5"),
|
||||
["\u00E5"]);
|
||||
assertEqArray(/\u00E5/iu.exec("\u212B"),
|
||||
["\u212B"]);
|
||||
|
||||
if (typeof reportCompare === "function")
|
||||
reportCompare(true, true);
|
39
js/src/tests/ecma_6/RegExp/unicode-ignoreCase-escape.js
Normal file
39
js/src/tests/ecma_6/RegExp/unicode-ignoreCase-escape.js
Normal file
@ -0,0 +1,39 @@
|
||||
var BUGNUMBER = 1135377;
|
||||
var summary = "Implement RegExp unicode flag -- ignoreCase flag with character class escape.";
|
||||
|
||||
print(BUGNUMBER + ": " + summary);
|
||||
|
||||
// LATIN SMALL LETTER LONG S
|
||||
|
||||
assertEqArray(/\w/iu.exec("S"),
|
||||
["S"]);
|
||||
assertEqArray(/\w/iu.exec("s"),
|
||||
["s"]);
|
||||
assertEqArray(/\w/iu.exec("\u017F"),
|
||||
["\u017F"]);
|
||||
|
||||
assertEqArray(/\W/iu.exec("S"),
|
||||
["S"]);
|
||||
assertEqArray(/\W/iu.exec("s"),
|
||||
["s"]);
|
||||
assertEqArray(/\W/iu.exec("\u017F"),
|
||||
["\u017F"]);
|
||||
|
||||
// KELVIN SIGN
|
||||
|
||||
assertEqArray(/\w/iu.exec("k"),
|
||||
["k"]);
|
||||
assertEqArray(/\w/iu.exec("k"),
|
||||
["k"]);
|
||||
assertEqArray(/\w/iu.exec("\u212A"),
|
||||
["\u212A"]);
|
||||
|
||||
assertEqArray(/\W/iu.exec("k"),
|
||||
["k"]);
|
||||
assertEqArray(/\W/iu.exec("k"),
|
||||
["k"]);
|
||||
assertEqArray(/\W/iu.exec("\u212A"),
|
||||
["\u212A"]);
|
||||
|
||||
if (typeof reportCompare === "function")
|
||||
reportCompare(true, true);
|
19
js/src/tests/ecma_6/RegExp/unicode-ignoreCase-negated.js
Normal file
19
js/src/tests/ecma_6/RegExp/unicode-ignoreCase-negated.js
Normal file
@ -0,0 +1,19 @@
|
||||
var BUGNUMBER = 1135377;
|
||||
var summary = "Implement RegExp unicode flag -- ignoreCase flag with negated character class.";
|
||||
|
||||
print(BUGNUMBER + ": " + summary);
|
||||
|
||||
assertEq(/[^A]/iu.exec("A"),
|
||||
null);
|
||||
assertEq(/[^a]/iu.exec("A"),
|
||||
null);
|
||||
assertEq(/[^A]/iu.exec("a"),
|
||||
null);
|
||||
assertEq(/[^a]/iu.exec("a"),
|
||||
null);
|
||||
|
||||
assertEqArray(/[^A]/iu.exec("b"),
|
||||
["b"]);
|
||||
|
||||
if (typeof reportCompare === "function")
|
||||
reportCompare(true, true);
|
2245
js/src/tests/ecma_6/RegExp/unicode-ignoreCase.js
Normal file
2245
js/src/tests/ecma_6/RegExp/unicode-ignoreCase.js
Normal file
File diff suppressed because it is too large
Load Diff
1414
js/src/vm/CaseFolding.txt
Normal file
1414
js/src/vm/CaseFolding.txt
Normal file
File diff suppressed because it is too large
Load Diff
@ -519,7 +519,7 @@ RegExpShared::compile(JSContext* cx, HandleAtom pattern, HandleLinearString inpu
|
||||
/* Parse the pattern. */
|
||||
irregexp::RegExpCompileData data;
|
||||
if (!irregexp::ParsePattern(dummyTokenStream, cx->tempLifoAlloc(), pattern,
|
||||
multiline(), mode == MatchOnly, unicode(), &data))
|
||||
multiline(), mode == MatchOnly, unicode(), ignoreCase(), &data))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
@ -532,7 +532,7 @@ RegExpShared::compile(JSContext* cx, HandleAtom pattern, HandleLinearString inpu
|
||||
input->hasLatin1Chars(),
|
||||
mode == MatchOnly,
|
||||
force == ForceByteCode,
|
||||
sticky());
|
||||
sticky(), unicode());
|
||||
if (code.empty())
|
||||
return false;
|
||||
|
||||
|
@ -772,4 +772,439 @@ const uint8_t unicode::index2[] = {
|
||||
5, 5, 5, 0, 0, 0,
|
||||
};
|
||||
|
||||
const FoldingInfo unicode::js_foldinfo[] = {
|
||||
{0, 0, 0, 0},
|
||||
{32, 0, 0, 0},
|
||||
{32, 8415, 0, 0},
|
||||
{32, 300, 0, 0},
|
||||
{0, 65504, 0, 0},
|
||||
{0, 65504, 8383, 0},
|
||||
{0, 65504, 268, 0},
|
||||
{775, 743, 0, 0},
|
||||
{32, 8294, 0, 0},
|
||||
{0, 7615, 0, 0},
|
||||
{0, 65504, 8262, 0},
|
||||
{0, 121, 0, 0},
|
||||
{1, 0, 0, 0},
|
||||
{0, 65535, 0, 0},
|
||||
{65415, 0, 0, 0},
|
||||
{65268, 65236, 0, 0},
|
||||
{0, 195, 0, 0},
|
||||
{210, 0, 0, 0},
|
||||
{206, 0, 0, 0},
|
||||
{205, 0, 0, 0},
|
||||
{79, 0, 0, 0},
|
||||
{202, 0, 0, 0},
|
||||
{203, 0, 0, 0},
|
||||
{207, 0, 0, 0},
|
||||
{0, 97, 0, 0},
|
||||
{211, 0, 0, 0},
|
||||
{209, 0, 0, 0},
|
||||
{0, 163, 0, 0},
|
||||
{213, 0, 0, 0},
|
||||
{0, 130, 0, 0},
|
||||
{214, 0, 0, 0},
|
||||
{218, 0, 0, 0},
|
||||
{217, 0, 0, 0},
|
||||
{219, 0, 0, 0},
|
||||
{0, 56, 0, 0},
|
||||
{2, 1, 0, 0},
|
||||
{1, 65535, 0, 0},
|
||||
{0, 65534, 65535, 0},
|
||||
{0, 65457, 0, 0},
|
||||
{65439, 0, 0, 0},
|
||||
{65480, 0, 0, 0},
|
||||
{65406, 0, 0, 0},
|
||||
{10795, 0, 0, 0},
|
||||
{65373, 0, 0, 0},
|
||||
{10792, 0, 0, 0},
|
||||
{0, 10815, 0, 0},
|
||||
{65341, 0, 0, 0},
|
||||
{69, 0, 0, 0},
|
||||
{71, 0, 0, 0},
|
||||
{0, 10783, 0, 0},
|
||||
{0, 10780, 0, 0},
|
||||
{0, 10782, 0, 0},
|
||||
{0, 65326, 0, 0},
|
||||
{0, 65330, 0, 0},
|
||||
{0, 65331, 0, 0},
|
||||
{0, 65334, 0, 0},
|
||||
{0, 65333, 0, 0},
|
||||
{0, 42319, 0, 0},
|
||||
{0, 42315, 0, 0},
|
||||
{0, 65329, 0, 0},
|
||||
{0, 42280, 0, 0},
|
||||
{0, 42308, 0, 0},
|
||||
{0, 65327, 0, 0},
|
||||
{0, 65325, 0, 0},
|
||||
{0, 10743, 0, 0},
|
||||
{0, 42305, 0, 0},
|
||||
{0, 10749, 0, 0},
|
||||
{0, 65323, 0, 0},
|
||||
{0, 65322, 0, 0},
|
||||
{0, 10727, 0, 0},
|
||||
{0, 65318, 0, 0},
|
||||
{0, 42282, 0, 0},
|
||||
{0, 65467, 0, 0},
|
||||
{0, 65319, 0, 0},
|
||||
{0, 65465, 0, 0},
|
||||
{0, 65317, 0, 0},
|
||||
{0, 42261, 0, 0},
|
||||
{0, 42258, 0, 0},
|
||||
{116, 84, 7289, 0},
|
||||
{116, 0, 0, 0},
|
||||
{38, 0, 0, 0},
|
||||
{37, 0, 0, 0},
|
||||
{64, 0, 0, 0},
|
||||
{63, 0, 0, 0},
|
||||
{32, 62, 0, 0},
|
||||
{32, 96, 0, 0},
|
||||
{32, 57, 92, 0},
|
||||
{32, 65452, 7205, 0},
|
||||
{32, 86, 0, 0},
|
||||
{32, 64793, 0, 0},
|
||||
{32, 54, 0, 0},
|
||||
{32, 80, 0, 0},
|
||||
{32, 31, 0, 0},
|
||||
{32, 47, 0, 0},
|
||||
{32, 7549, 0, 0},
|
||||
{0, 65498, 0, 0},
|
||||
{0, 65499, 0, 0},
|
||||
{0, 65504, 30, 0},
|
||||
{0, 65504, 64, 0},
|
||||
{0, 65504, 25, 60},
|
||||
{0, 65420, 65504, 7173},
|
||||
{0, 65504, 54, 0},
|
||||
{0, 64761, 65504, 0},
|
||||
{0, 65504, 22, 0},
|
||||
{0, 65504, 48, 0},
|
||||
{1, 65505, 0, 0},
|
||||
{0, 65504, 65535, 0},
|
||||
{0, 65504, 15, 0},
|
||||
{0, 65504, 7517, 0},
|
||||
{0, 65472, 0, 0},
|
||||
{0, 65473, 0, 0},
|
||||
{8, 0, 0, 0},
|
||||
{65506, 65474, 0, 0},
|
||||
{65511, 65479, 35, 0},
|
||||
{65521, 65489, 0, 0},
|
||||
{65514, 65482, 0, 0},
|
||||
{0, 65528, 0, 0},
|
||||
{65482, 65450, 0, 0},
|
||||
{65488, 65456, 0, 0},
|
||||
{0, 7, 0, 0},
|
||||
{0, 65420, 0, 0},
|
||||
{65476, 65444, 65501, 0},
|
||||
{65472, 65440, 0, 0},
|
||||
{65529, 0, 0, 0},
|
||||
{80, 0, 0, 0},
|
||||
{0, 65456, 0, 0},
|
||||
{15, 0, 0, 0},
|
||||
{0, 65521, 0, 0},
|
||||
{48, 0, 0, 0},
|
||||
{0, 65488, 0, 0},
|
||||
{7264, 0, 0, 0},
|
||||
{0, 38864, 0, 0},
|
||||
{0, 8, 0, 0},
|
||||
{65528, 0, 0, 0},
|
||||
{0, 35332, 0, 0},
|
||||
{0, 3814, 0, 0},
|
||||
{1, 59, 0, 0},
|
||||
{0, 65535, 58, 0},
|
||||
{65478, 65477, 0, 0},
|
||||
{57921, 0, 0, 0},
|
||||
{0, 74, 0, 0},
|
||||
{0, 86, 0, 0},
|
||||
{0, 100, 0, 0},
|
||||
{0, 128, 0, 0},
|
||||
{0, 112, 0, 0},
|
||||
{0, 126, 0, 0},
|
||||
{0, 9, 0, 0},
|
||||
{65462, 0, 0, 0},
|
||||
{65527, 0, 0, 0},
|
||||
{58363, 58247, 58331, 0},
|
||||
{65450, 0, 0, 0},
|
||||
{65436, 0, 0, 0},
|
||||
{65424, 0, 0, 0},
|
||||
{65408, 0, 0, 0},
|
||||
{65410, 0, 0, 0},
|
||||
{58019, 57987, 0, 0},
|
||||
{57153, 57121, 0, 0},
|
||||
{57274, 57242, 0, 0},
|
||||
{28, 0, 0, 0},
|
||||
{0, 65508, 0, 0},
|
||||
{16, 0, 0, 0},
|
||||
{0, 65520, 0, 0},
|
||||
{26, 0, 0, 0},
|
||||
{0, 65510, 0, 0},
|
||||
{54793, 0, 0, 0},
|
||||
{61722, 0, 0, 0},
|
||||
{54809, 0, 0, 0},
|
||||
{0, 54741, 0, 0},
|
||||
{0, 54744, 0, 0},
|
||||
{54756, 0, 0, 0},
|
||||
{54787, 0, 0, 0},
|
||||
{54753, 0, 0, 0},
|
||||
{54754, 0, 0, 0},
|
||||
{54721, 0, 0, 0},
|
||||
{0, 58272, 0, 0},
|
||||
{30204, 0, 0, 0},
|
||||
{23256, 0, 0, 0},
|
||||
{23228, 0, 0, 0},
|
||||
{23217, 0, 0, 0},
|
||||
{23221, 0, 0, 0},
|
||||
{23231, 0, 0, 0},
|
||||
{23278, 0, 0, 0},
|
||||
{23254, 0, 0, 0},
|
||||
{23275, 0, 0, 0},
|
||||
{928, 0, 0, 0},
|
||||
{0, 64608, 0, 0},
|
||||
{26672, 0, 0, 0},
|
||||
};
|
||||
|
||||
const uint8_t unicode::folding_index1[] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 0, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 22, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 23, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 0, 0, 26, 27, 28, 26, 29, 30,
|
||||
31, 32, 0, 0, 0, 0, 33, 34, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 36, 37, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 39, 26, 40,
|
||||
41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42,
|
||||
43, 0, 44, 45, 46, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 47, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, 50, 0, 0,
|
||||
};
|
||||
|
||||
const uint8_t unicode::folding_index2[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1,
|
||||
1, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5,
|
||||
4, 4, 4, 4, 4, 4, 4, 6, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 8,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 9, 4, 4, 4, 4, 4, 10, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4,
|
||||
4, 4, 4, 11, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 0, 0,
|
||||
12, 13, 12, 13, 12, 13, 0, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12,
|
||||
13, 12, 13, 12, 13, 0, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 14, 12,
|
||||
13, 12, 13, 12, 13, 15, 16, 17, 12, 13, 12, 13, 18, 12, 13, 19, 19, 12,
|
||||
13, 0, 20, 21, 22, 12, 13, 19, 23, 24, 25, 26, 12, 13, 27, 0, 25, 28,
|
||||
29, 30, 12, 13, 12, 13, 12, 13, 31, 12, 13, 31, 0, 0, 12, 13, 31, 12,
|
||||
13, 32, 32, 12, 13, 12, 13, 33, 12, 13, 0, 0, 12, 13, 0, 34, 0, 0,
|
||||
0, 0, 35, 36, 37, 35, 36, 37, 35, 36, 37, 12, 13, 12, 13, 12, 13, 12,
|
||||
13, 12, 13, 12, 13, 12, 13, 12, 13, 38, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 0, 35, 36, 37, 12, 13, 39, 40,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 41, 0, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 0, 0, 0, 0, 0, 0, 42, 12, 13, 43, 44, 45,
|
||||
45, 12, 13, 46, 47, 48, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 49, 50,
|
||||
51, 52, 53, 0, 54, 54, 0, 55, 0, 56, 57, 0, 0, 0, 54, 58, 0, 59,
|
||||
0, 60, 61, 0, 62, 63, 0, 64, 65, 0, 0, 63, 0, 66, 67, 0, 0, 68,
|
||||
0, 0, 0, 0, 0, 0, 0, 69, 0, 0, 70, 0, 0, 70, 0, 0, 0, 71,
|
||||
70, 72, 73, 73, 74, 0, 0, 0, 0, 0, 75, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 76, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 78, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 12, 13,
|
||||
0, 0, 12, 13, 0, 0, 0, 29, 29, 29, 0, 79, 0, 0, 0, 0, 0, 0,
|
||||
80, 0, 81, 81, 81, 0, 82, 0, 83, 83, 0, 1, 84, 1, 1, 85, 1, 1,
|
||||
86, 87, 88, 1, 89, 1, 1, 1, 90, 91, 0, 92, 1, 1, 93, 1, 1, 94,
|
||||
1, 1, 95, 96, 96, 96, 0, 4, 97, 4, 4, 98, 4, 4, 99, 100, 101, 4,
|
||||
102, 4, 4, 4, 103, 104, 105, 106, 4, 4, 107, 4, 4, 108, 4, 4, 109, 110,
|
||||
110, 111, 112, 113, 0, 0, 0, 114, 115, 116, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 117, 118,
|
||||
119, 120, 121, 122, 0, 12, 13, 123, 12, 13, 0, 41, 41, 41, 124, 124, 124, 124,
|
||||
124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125,
|
||||
125, 125, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 126, 12, 13, 12, 13, 12, 13, 12, 13, 12,
|
||||
13, 12, 13, 12, 13, 127, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 0, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129,
|
||||
129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129,
|
||||
129, 129, 129, 129, 129, 129, 129, 129, 129, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130,
|
||||
130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130,
|
||||
130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 0, 130, 0, 0, 0, 0, 0, 130,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 131, 131, 131, 131, 131, 131, 131, 131,
|
||||
131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131,
|
||||
131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131,
|
||||
131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131,
|
||||
131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131,
|
||||
132, 132, 132, 132, 132, 132, 0, 0, 133, 133, 133, 133, 133, 133, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 134, 0, 0, 0, 135, 0, 0, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 136, 137, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 0, 0, 0, 0,
|
||||
0, 138, 0, 0, 139, 0, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 132, 132, 132, 132, 132, 132, 132, 132, 133, 133, 133, 133, 133, 133, 133, 133,
|
||||
132, 132, 132, 132, 132, 132, 0, 0, 133, 133, 133, 133, 133, 133, 0, 0, 132, 132,
|
||||
132, 132, 132, 132, 132, 132, 133, 133, 133, 133, 133, 133, 133, 133, 132, 132, 132, 132,
|
||||
132, 132, 132, 132, 133, 133, 133, 133, 133, 133, 133, 133, 132, 132, 132, 132, 132, 132,
|
||||
0, 0, 133, 133, 133, 133, 133, 133, 0, 0, 0, 132, 0, 132, 0, 132, 0, 132,
|
||||
0, 133, 0, 133, 0, 133, 0, 133, 132, 132, 132, 132, 132, 132, 132, 132, 133, 133,
|
||||
133, 133, 133, 133, 133, 133, 140, 140, 141, 141, 141, 141, 142, 142, 143, 143, 144, 144,
|
||||
145, 145, 0, 0, 132, 132, 132, 132, 132, 132, 132, 132, 133, 133, 133, 133, 133, 133,
|
||||
133, 133, 132, 132, 132, 132, 132, 132, 132, 132, 133, 133, 133, 133, 133, 133, 133, 133,
|
||||
132, 132, 132, 132, 132, 132, 132, 132, 133, 133, 133, 133, 133, 133, 133, 133, 132, 132,
|
||||
0, 146, 0, 0, 0, 0, 133, 133, 147, 147, 148, 0, 149, 0, 0, 0, 0, 146,
|
||||
0, 0, 0, 0, 150, 150, 150, 150, 148, 0, 0, 0, 132, 132, 0, 0, 0, 0,
|
||||
0, 0, 133, 133, 151, 151, 0, 0, 0, 0, 132, 132, 0, 0, 0, 119, 0, 0,
|
||||
133, 133, 152, 152, 123, 0, 0, 0, 0, 0, 0, 146, 0, 0, 0, 0, 153, 153,
|
||||
154, 154, 148, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 155, 0, 0, 0, 156, 157, 0, 0, 0, 0,
|
||||
0, 0, 158, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 160, 160, 160, 160, 160, 160,
|
||||
160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 161, 161, 161, 161, 161, 161, 161, 161,
|
||||
161, 161, 161, 161, 161, 161, 161, 161, 0, 0, 0, 12, 13, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162,
|
||||
162, 162, 162, 162, 162, 162, 162, 162, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163,
|
||||
163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 0, 129, 129, 129, 129,
|
||||
129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129,
|
||||
129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129,
|
||||
129, 129, 129, 129, 129, 129, 129, 0, 12, 13, 164, 165, 166, 167, 168, 12, 13, 12,
|
||||
13, 12, 13, 169, 170, 171, 172, 0, 12, 13, 0, 12, 13, 0, 0, 0, 0, 0,
|
||||
0, 0, 173, 173, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 0, 0, 0, 0, 0, 0, 0, 12, 13, 12, 13, 0, 0, 0,
|
||||
12, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 174, 174, 174, 174,
|
||||
174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174,
|
||||
174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 0, 174,
|
||||
0, 0, 0, 0, 0, 174, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 0, 0, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 12, 13, 12, 13, 175, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13,
|
||||
0, 0, 0, 12, 13, 176, 0, 0, 12, 13, 12, 13, 0, 0, 12, 13, 12, 13,
|
||||
12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 177, 178,
|
||||
179, 180, 0, 0, 181, 182, 183, 184, 12, 13, 12, 13, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 185, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 186, 186, 186, 186,
|
||||
186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186,
|
||||
186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186,
|
||||
186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186,
|
||||
186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186,
|
||||
186, 186, 186, 186, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
|
||||
|
@ -234,6 +234,55 @@ CanLowerCase(char16_t ch)
|
||||
return CharInfo(ch).lowerCase != 0;
|
||||
}
|
||||
|
||||
class FoldingInfo {
|
||||
public:
|
||||
uint16_t folding;
|
||||
uint16_t reverse1;
|
||||
uint16_t reverse2;
|
||||
uint16_t reverse3;
|
||||
};
|
||||
|
||||
extern const uint8_t folding_index1[];
|
||||
extern const uint8_t folding_index2[];
|
||||
extern const FoldingInfo js_foldinfo[];
|
||||
|
||||
inline const FoldingInfo&
|
||||
CaseFoldInfo(char16_t code)
|
||||
{
|
||||
const size_t shift = 6;
|
||||
size_t index = folding_index1[code >> shift];
|
||||
index = folding_index2[(index << shift) + (code & ((1 << shift) - 1))];
|
||||
return js_foldinfo[index];
|
||||
}
|
||||
|
||||
inline char16_t
|
||||
FoldCase(char16_t ch)
|
||||
{
|
||||
const FoldingInfo& info = CaseFoldInfo(ch);
|
||||
return uint16_t(ch) + info.folding;
|
||||
}
|
||||
|
||||
inline char16_t
|
||||
ReverseFoldCase1(char16_t ch)
|
||||
{
|
||||
const FoldingInfo& info = CaseFoldInfo(ch);
|
||||
return uint16_t(ch) + info.reverse1;
|
||||
}
|
||||
|
||||
inline char16_t
|
||||
ReverseFoldCase2(char16_t ch)
|
||||
{
|
||||
const FoldingInfo& info = CaseFoldInfo(ch);
|
||||
return uint16_t(ch) + info.reverse2;
|
||||
}
|
||||
|
||||
inline char16_t
|
||||
ReverseFoldCase3(char16_t ch)
|
||||
{
|
||||
const FoldingInfo& info = CaseFoldInfo(ch);
|
||||
return uint16_t(ch) + info.reverse3;
|
||||
}
|
||||
|
||||
const size_t LeadSurrogateMin = 0xD800;
|
||||
const size_t LeadSurrogateMax = 0xDBFF;
|
||||
const size_t TrailSurrogateMin = 0xDC00;
|
||||
|
@ -84,13 +84,33 @@ def read_unicode_data(unicode_file):
|
||||
row[0] = int(row[0], 16)
|
||||
yield row
|
||||
|
||||
def generate_unicode_stuff(unicode_data, data_file, test_mapping, test_space):
|
||||
def read_case_folding(case_folding):
|
||||
for line in case_folding:
|
||||
if line == '\n' or line.startswith('#'):
|
||||
continue
|
||||
row = line.split('; ')
|
||||
if row[1] in ['F', 'T']:
|
||||
continue
|
||||
row[0] = int(row[0], 16)
|
||||
row[2] = int(row[2], 16)
|
||||
yield row
|
||||
|
||||
def generate_unicode_stuff(unicode_data, case_folding,
|
||||
data_file, test_mapping, test_space, test_icase):
|
||||
dummy = (0, 0, 0)
|
||||
table = [dummy]
|
||||
cache = {dummy: 0}
|
||||
index = [0] * (MAX + 1)
|
||||
folding_map = {}
|
||||
rev_folding_map = {}
|
||||
folding_dummy = (0, 0, 0, 0)
|
||||
folding_table = [folding_dummy]
|
||||
folding_cache = {folding_dummy: 0}
|
||||
folding_index = [0] * (MAX + 1)
|
||||
test_table = {}
|
||||
test_space_table = []
|
||||
folding_tests = []
|
||||
folding_codes = set()
|
||||
|
||||
for row in read_unicode_data(unicode_data):
|
||||
code = row[0]
|
||||
@ -143,6 +163,64 @@ def generate_unicode_stuff(unicode_data, data_file, test_mapping, test_space):
|
||||
table.append(item)
|
||||
index[code] = i
|
||||
|
||||
for row in read_case_folding(case_folding):
|
||||
code = row[0]
|
||||
mapping = row[2]
|
||||
folding_map[code] = mapping
|
||||
|
||||
if mapping not in rev_folding_map:
|
||||
rev_folding_map[mapping] = [code]
|
||||
else:
|
||||
rev_folding_map[mapping].append(code)
|
||||
|
||||
folding_codes.add(code)
|
||||
folding_codes.add(mapping)
|
||||
|
||||
for code in sorted(folding_codes):
|
||||
if code > MAX:
|
||||
continue
|
||||
|
||||
if code in folding_map:
|
||||
folding = folding_map[code]
|
||||
else:
|
||||
folding = code
|
||||
|
||||
if code in rev_folding_map:
|
||||
rev_folding = rev_folding_map[code]
|
||||
elif folding in rev_folding_map:
|
||||
rev_folding = [c for c in rev_folding_map[folding] if c != code]
|
||||
else:
|
||||
rev_folding = []
|
||||
|
||||
assert len(rev_folding) <= 3
|
||||
|
||||
if folding != code or len(rev_folding):
|
||||
item = [code]
|
||||
if folding != code:
|
||||
item.append(folding)
|
||||
folding_tests.append(item + rev_folding)
|
||||
|
||||
folding_d = folding - code
|
||||
rev_folding_ds = [v - code for v in rev_folding]
|
||||
|
||||
assert folding_d > -65535 and folding_d < 65535
|
||||
assert all([v > -65535 and v < 65535 for v in rev_folding])
|
||||
|
||||
folding = folding_d & 0xffff
|
||||
rev_folding = [v & 0xffff for v in rev_folding_ds]
|
||||
rev_folding_0 = rev_folding[0] if len(rev_folding) >= 1 else 0
|
||||
rev_folding_1 = rev_folding[1] if len(rev_folding) >= 2 else 0
|
||||
rev_folding_2 = rev_folding[2] if len(rev_folding) >= 3 else 0
|
||||
|
||||
item = (folding, rev_folding_0, rev_folding_1, rev_folding_2)
|
||||
|
||||
i = folding_cache.get(item)
|
||||
if i is None:
|
||||
assert item not in folding_table
|
||||
folding_cache[item] = i = len(folding_table)
|
||||
folding_table.append(item)
|
||||
folding_index[code] = i
|
||||
|
||||
test_mapping.write('/* Generated by make_unicode.py DO NOT MODIFY */\n')
|
||||
test_mapping.write(public_domain)
|
||||
test_mapping.write('var mapping = [\n')
|
||||
@ -180,6 +258,29 @@ assertEq((onlySpace + 'aaaa').trim(), 'aaaa');
|
||||
assertEq(('aaaa' + onlySpace).trim(), 'aaaa');
|
||||
assertEq((onlySpace + 'aaaa' + onlySpace).trim(), 'aaaa');
|
||||
|
||||
if (typeof reportCompare === "function")
|
||||
reportCompare(true, true);
|
||||
""")
|
||||
|
||||
test_icase.write('/* Generated by make_unicode.py DO NOT MODIFY */\n')
|
||||
test_icase.write(public_domain)
|
||||
test_icase.write("""
|
||||
var BUGNUMBER = 1135377;
|
||||
var summary = "Implement RegExp unicode flag -- ignoreCase flag.";
|
||||
|
||||
print(BUGNUMBER + ": " + summary);
|
||||
|
||||
function test(code, ...equivs) {
|
||||
var codeRe = new RegExp(String.fromCodePoint(code) + "+", "iu");
|
||||
var ans = String.fromCodePoint(code) + equivs.map(c => String.fromCodePoint(c)).join("");
|
||||
assertEqArray(codeRe.exec("<" + ans + ">"), [ans]);
|
||||
codeRe = new RegExp("[" + String.fromCodePoint(code) + "]+", "iu");
|
||||
assertEqArray(codeRe.exec("<" + ans + ">"), [ans]);
|
||||
}
|
||||
""")
|
||||
for args in folding_tests:
|
||||
test_icase.write('test(' + ','.join([hex(c) for c in args]) + ');\n')
|
||||
test_icase.write("""
|
||||
if (typeof reportCompare === "function")
|
||||
reportCompare(true, true);
|
||||
""")
|
||||
@ -189,6 +290,11 @@ if (typeof reportCompare === "function")
|
||||
# Don't forget to update CharInfo in Unicode.cpp if you need to change this
|
||||
assert shift == 5
|
||||
|
||||
folding_index1, folding_index2, folding_shift = splitbins(folding_index)
|
||||
|
||||
# Don't forget to update CharInfo in Unicode.cpp if you need to change this
|
||||
assert folding_shift == 6
|
||||
|
||||
# verify correctness
|
||||
for char in index:
|
||||
test = table[index[char]]
|
||||
@ -198,6 +304,14 @@ if (typeof reportCompare === "function")
|
||||
|
||||
assert test == table[idx]
|
||||
|
||||
# verify correctness
|
||||
for char in folding_index:
|
||||
test = folding_table[folding_index[char]]
|
||||
|
||||
idx = folding_index1[char >> folding_shift]
|
||||
idx = folding_index2[(idx << folding_shift) + (char & ((1 << folding_shift) - 1))]
|
||||
|
||||
assert test == folding_table[idx]
|
||||
|
||||
comment = """
|
||||
/*
|
||||
@ -284,6 +398,19 @@ if (typeof reportCompare === "function")
|
||||
dump(index2, 'index2', data_file)
|
||||
data_file.write('\n')
|
||||
|
||||
data_file.write('const FoldingInfo unicode::js_foldinfo[] = {\n')
|
||||
for d in folding_table:
|
||||
data_file.write(' {')
|
||||
data_file.write(', '.join((str(e) for e in d)))
|
||||
data_file.write('},\n')
|
||||
data_file.write('};\n')
|
||||
data_file.write('\n')
|
||||
|
||||
dump(folding_index1, 'folding_index1', data_file)
|
||||
data_file.write('\n')
|
||||
dump(folding_index2, 'folding_index2', data_file)
|
||||
data_file.write('\n')
|
||||
|
||||
data_file.write('\n')
|
||||
|
||||
def getsize(data):
|
||||
@ -362,7 +489,7 @@ if __name__ == '__main__':
|
||||
print('Always make sure you have the newest UnicodeData.txt!')
|
||||
unicode_data = open(sys.argv[1], 'r')
|
||||
else:
|
||||
print('Downloading...')
|
||||
print('Downloading UnicodeData.txt...')
|
||||
reader = urllib2.urlopen('http://unicode.org/Public/UNIDATA/UnicodeData.txt')
|
||||
data = reader.read()
|
||||
reader.close()
|
||||
@ -370,8 +497,21 @@ if __name__ == '__main__':
|
||||
unicode_data.write(data)
|
||||
unicode_data.seek(0)
|
||||
|
||||
if len(sys.argv) > 2:
|
||||
print('Always make sure you have the newest CaseFolding.txt!')
|
||||
case_folding = open(sys.argv[2], 'r')
|
||||
else:
|
||||
print('Downloading CaseFolding.txt...')
|
||||
reader = urllib2.urlopen('http://unicode.org/Public/UNIDATA/CaseFolding.txt')
|
||||
data = reader.read()
|
||||
reader.close()
|
||||
case_folding = open('CaseFolding.txt', 'w+')
|
||||
case_folding.write(data)
|
||||
case_folding.seek(0)
|
||||
|
||||
print('Generating...')
|
||||
generate_unicode_stuff(unicode_data,
|
||||
generate_unicode_stuff(unicode_data, case_folding,
|
||||
open('Unicode.cpp', 'w'),
|
||||
open('../tests/ecma_5/String/string-upper-lower-mapping.js', 'w'),
|
||||
open('../tests/ecma_5/String/string-space-trim.js', 'w'))
|
||||
open('../tests/ecma_5/String/string-space-trim.js', 'w'),
|
||||
open('../tests/ecma_6/RegExp/unicode-ignoreCase.js', 'w'))
|
||||
|
Loading…
x
Reference in New Issue
Block a user