From cf744e9d355c342dc18b4f930e70e372b457f96e Mon Sep 17 00:00:00 2001 From: Tooru Fujisawa Date: Fri, 7 Aug 2015 08:12:01 +0900 Subject: [PATCH] #393, Bug 1135377 - Part 6: Support ignoreCase for BMP in RegExp with unicode flag. r=till, f=anba --- .../irregexp/NativeRegExpMacroAssembler.cpp | 18 +- .../NativeRegExpMacroAssembler.cpp.rej | 50 + js/src/irregexp/NativeRegExpMacroAssembler.h | 2 +- js/src/irregexp/RegExpBytecode.h | 3 +- js/src/irregexp/RegExpEngine.cpp | 191 +- js/src/irregexp/RegExpEngine.h | 27 +- js/src/irregexp/RegExpInterpreter.cpp | 21 + js/src/irregexp/RegExpMacroAssembler.cpp | 41 +- js/src/irregexp/RegExpMacroAssembler.h | 10 +- js/src/irregexp/RegExpParser.cpp | 38 +- js/src/irregexp/RegExpParser.h | 6 +- .../ecma_6/RegExp/unicode-ignoreCase-ascii.js | 45 + .../RegExp/unicode-ignoreCase-escape.js | 39 + .../RegExp/unicode-ignoreCase-negated.js | 19 + .../tests/ecma_6/RegExp/unicode-ignoreCase.js | 2245 +++++++++++++++++ js/src/vm/CaseFolding.txt | 1414 +++++++++++ js/src/vm/RegExpObject.cpp | 4 +- js/src/vm/Unicode.cpp | 435 ++++ js/src/vm/Unicode.h | 49 + js/src/vm/make_unicode.py | 148 +- 20 files changed, 4708 insertions(+), 97 deletions(-) create mode 100644 js/src/irregexp/NativeRegExpMacroAssembler.cpp.rej create mode 100644 js/src/tests/ecma_6/RegExp/unicode-ignoreCase-ascii.js create mode 100644 js/src/tests/ecma_6/RegExp/unicode-ignoreCase-escape.js create mode 100644 js/src/tests/ecma_6/RegExp/unicode-ignoreCase-negated.js create mode 100644 js/src/tests/ecma_6/RegExp/unicode-ignoreCase.js create mode 100644 js/src/vm/CaseFolding.txt diff --git a/js/src/irregexp/NativeRegExpMacroAssembler.cpp b/js/src/irregexp/NativeRegExpMacroAssembler.cpp index 280bfe9a2..1d8a5425b 100644 --- a/js/src/irregexp/NativeRegExpMacroAssembler.cpp +++ b/js/src/irregexp/NativeRegExpMacroAssembler.cpp @@ -783,9 +783,10 @@ NativeRegExpMacroAssembler::CheckGreedyLoop(Label* on_tos_equals_current_positio } void -NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, Label* on_no_match) +NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, Label* on_no_match, + bool unicode) { - JitSpew(SPEW_PREFIX "CheckNotBackReference(%d)", start_reg); + JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d, %d)", start_reg, unicode); Label fallthrough; Label success; @@ -1034,8 +1035,13 @@ NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, Label masm.passABIArg(current_character); masm.passABIArg(current_position); masm.passABIArg(temp1); - int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings; - masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun)); + if (!unicode) { + int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings; + masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun)); + } else { + int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareUCStrings; + masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun)); + } masm.storeCallResult(temp0); masm.PopRegsInMask(volatileRegs); @@ -1047,7 +1053,9 @@ NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, Label // PowerPC specific version, somewhat more efficient (fixes issue 308) Register ppc0 = (temp1 == r6) ? r7 : r6; Register ppc1 = (temp1 == r8) ? r9 : r8; - int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings; + int (*fun)(const char16_t*, const char16_t*, size_t) = (unicode) + ? CaseInsensitiveCompareUCStrings + : CaseInsensitiveCompareStrings ; // This is lazy, but only incurs one extra x_subi. masm.x_mflr(r0); diff --git a/js/src/irregexp/NativeRegExpMacroAssembler.cpp.rej b/js/src/irregexp/NativeRegExpMacroAssembler.cpp.rej new file mode 100644 index 000000000..c0fc9b38c --- /dev/null +++ b/js/src/irregexp/NativeRegExpMacroAssembler.cpp.rej @@ -0,0 +1,50 @@ +diff a/js/src/irregexp/NativeRegExpMacroAssembler.cpp b/js/src/irregexp/NativeRegExpMacroAssembler.cpp (rejected hunks) +@@ -714,19 +714,20 @@ NativeRegExpMacroAssembler::CheckNotBack + + // Restore backtrack stack pointer. + masm.pop(backtrack_stack_pointer); + + masm.bind(&fallthrough); + } + + void +-NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, Label* on_no_match) ++NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, Label* on_no_match, ++ bool unicode) + { +- JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d)", start_reg); ++ JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d, %d)", start_reg, unicode); + + Label fallthrough; + + masm.loadPtr(register_location(start_reg), current_character); // Index of start of capture + masm.loadPtr(register_location(start_reg + 1), temp1); // Index of end of capture + masm.subPtr(current_character, temp1); // Length of capture. + + // The length of a capture should not be negative. This can only happen +@@ -828,18 +829,23 @@ NativeRegExpMacroAssembler::CheckNotBack + // Parameters are + // Address byte_offset1 - Address captured substring's start. + // Address byte_offset2 - Address of current character position. + // size_t byte_length - length of capture in bytes(!) + masm.setupUnalignedABICall(temp0); + masm.passABIArg(current_character); + masm.passABIArg(current_position); + masm.passABIArg(temp1); +- int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings; +- masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun)); ++ if (!unicode) { ++ int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings; ++ masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun)); ++ } else { ++ int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareUCStrings; ++ masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun)); ++ } + masm.storeCallResult(temp0); + + masm.PopRegsInMask(volatileRegs); + + // Check if function returned non-zero for success or zero for failure. + masm.branchTest32(Assembler::Zero, temp0, temp0, BranchOrBacktrack(on_no_match)); + + // On success, increment position by length of capture. diff --git a/js/src/irregexp/NativeRegExpMacroAssembler.h b/js/src/irregexp/NativeRegExpMacroAssembler.h index 996e4e6d2..4b2674c4c 100644 --- a/js/src/irregexp/NativeRegExpMacroAssembler.h +++ b/js/src/irregexp/NativeRegExpMacroAssembler.h @@ -104,7 +104,7 @@ class MOZ_STACK_CLASS NativeRegExpMacroAssembler : public RegExpMacroAssembler void CheckGreedyLoop(jit::Label* on_tos_equals_current_position); void CheckNotAtStart(jit::Label* on_not_at_start); void CheckNotBackReference(int start_reg, jit::Label* on_no_match); - void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match); + void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match, bool unicode); void CheckNotCharacter(unsigned c, jit::Label* on_not_equal); void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_not_equal); void CheckNotCharacterAfterMinusAnd(char16_t c, char16_t minus, char16_t and_with, diff --git a/js/src/irregexp/RegExpBytecode.h b/js/src/irregexp/RegExpBytecode.h index da035c7b5..f31b78c59 100644 --- a/js/src/irregexp/RegExpBytecode.h +++ b/js/src/irregexp/RegExpBytecode.h @@ -90,7 +90,8 @@ V(CHECK_AT_START, 43, 8) /* bc8 pad24 addr32 */ \ V(CHECK_NOT_AT_START, 44, 8) /* bc8 pad24 addr32 */ \ V(CHECK_GREEDY, 45, 8) /* bc8 pad24 addr32 */ \ V(ADVANCE_CP_AND_GOTO, 46, 8) /* bc8 offset24 addr32 */ \ -V(SET_CURRENT_POSITION_FROM_END, 47, 4) /* bc8 idx24 */ +V(SET_CURRENT_POSITION_FROM_END, 47, 4) /* bc8 idx24 */ \ +V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 48, 8) /* bc8 reg_idx24 addr32 */ #define DECLARE_BYTECODES(name, code, length) \ static const int BC_##name = code; diff --git a/js/src/irregexp/RegExpEngine.cpp b/js/src/irregexp/RegExpEngine.cpp index 53496792f..82bd34881 100644 --- a/js/src/irregexp/RegExpEngine.cpp +++ b/js/src/irregexp/RegExpEngine.cpp @@ -82,11 +82,26 @@ static const int kSpaceAndSurrogateRangeCount = ArrayLength(kSpaceAndSurrogateRa static const int kWordRanges[] = { '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 }; static const int kWordRangeCount = ArrayLength(kWordRanges); +static const int kIgnoreCaseWordRanges[] = { + '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, + 0x017F, 0x017F + 1, 0x212A, 0x212A + 1, + 0x10000 }; +static const int kIgnoreCaseWordCount = ArrayLength(kIgnoreCaseWordRanges); static const int kWordAndSurrogateRanges[] = { '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, unicode::LeadSurrogateMin, unicode::TrailSurrogateMax + 1, 0x10000 }; static const int kWordAndSurrogateRangeCount = ArrayLength(kWordAndSurrogateRanges); +static const int kNegatedIgnoreCaseWordAndSurrogateRanges[] = { + 0, '0', '9' + 1, 'A', + 'K', 'K' + 1, 'S', 'S' + 1, + 'Z' + 1, '_', '_' + 1, 'a', + 'k', 'k' + 1, 's', 's' + 1, + 'z' + 1, unicode::LeadSurrogateMin, + unicode::TrailSurrogateMax + 1, 0x10000, + 0x10000 }; +static const int kNegatedIgnoreCaseWordAndSurrogateRangeCount = + ArrayLength(kNegatedIgnoreCaseWordAndSurrogateRanges); static const int kDigitRanges[] = { '0', '9' + 1, 0x10000 }; static const int kDigitRangeCount = ArrayLength(kDigitRanges); static const int kDigitAndSurrogateRanges[] = { @@ -186,14 +201,29 @@ CharacterRange::AddClassEscape(LifoAlloc* alloc, char16_t type, // Add class escape, excluding surrogate pair range. void CharacterRange::AddClassEscapeUnicode(LifoAlloc* alloc, char16_t type, - CharacterRangeVector* ranges) + CharacterRangeVector* ranges, bool ignore_case) { switch (type) { + case 's': + case 'd': + return AddClassEscape(alloc, type, ranges); + break; case 'S': AddClassNegated(kSpaceAndSurrogateRanges, kSpaceAndSurrogateRangeCount, ranges); break; + case 'w': + if (ignore_case) + AddClass(kIgnoreCaseWordRanges, kIgnoreCaseWordCount, ranges); + else + AddClassEscape(alloc, type, ranges); + break; case 'W': - AddClassNegated(kWordAndSurrogateRanges, kWordAndSurrogateRangeCount, ranges); + if (ignore_case) { + AddClass(kNegatedIgnoreCaseWordAndSurrogateRanges, + kNegatedIgnoreCaseWordAndSurrogateRangeCount, ranges); + } else { + AddClassNegated(kWordAndSurrogateRanges, kWordAndSurrogateRangeCount, ranges); + } break; case 'D': AddClassNegated(kDigitAndSurrogateRanges, kDigitAndSurrogateRangeCount, ranges); @@ -203,20 +233,39 @@ CharacterRange::AddClassEscapeUnicode(LifoAlloc* alloc, char16_t type, } } +#define FOR_EACH_NON_ASCII_TO_ASCII_FOLDING(macro) \ + /* LATIN CAPITAL LETTER Y WITH DIAERESIS */ \ + macro(0x0178, 0x00FF) \ + /* LATIN SMALL LETTER LONG S */ \ + macro(0x017F, 0x0073) \ + /* LATIN CAPITAL LETTER SHARP S */ \ + macro(0x1E9E, 0x00DF) \ + /* KELVIN SIGN */ \ + macro(0x212A, 0x006B) \ + /* ANGSTROM SIGN */ \ + macro(0x212B, 0x00E5) + // We need to check for the following characters: 0x39c 0x3bc 0x178. static inline bool -RangeContainsLatin1Equivalents(CharacterRange range) +RangeContainsLatin1Equivalents(CharacterRange range, bool unicode) { - // TODO(dcarney): this could be a lot more efficient. + /* TODO(dcarney): this could be a lot more efficient. */ + if (unicode) { +#define CHECK_RANGE(C, F) \ + if (range.Contains(C)) return true; +FOR_EACH_NON_ASCII_TO_ASCII_FOLDING(CHECK_RANGE) +#undef CHECK_RANGE + } + return range.Contains(0x39c) || range.Contains(0x3bc) || range.Contains(0x178); } static bool -RangesContainLatin1Equivalents(const CharacterRangeVector& ranges) +RangesContainLatin1Equivalents(const CharacterRangeVector& ranges, bool unicode) { for (size_t i = 0; i < ranges.length(); i++) { // TODO(dcarney): this could be a lot more efficient. - if (RangeContainsLatin1Equivalents(ranges[i])) + if (RangeContainsLatin1Equivalents(ranges[i], unicode)) return true; } return false; @@ -229,27 +278,24 @@ static const size_t kEcma262UnCanonicalizeMaxWidth = 4; static int GetCaseIndependentLetters(char16_t character, bool ascii_subject, + bool unicode, + const char16_t* choices, + size_t choices_length, char16_t* letters) { - const char16_t choices[] = { - character, - unicode::ToLowerCase(character), - unicode::ToUpperCase(character) - }; - size_t count = 0; - for (size_t i = 0; i < ArrayLength(choices); i++) { + for (size_t i = 0; i < choices_length; i++) { char16_t c = choices[i]; // The standard requires that non-ASCII characters cannot have ASCII // character codes in their equivalence class, even though this // situation occurs multiple times in the unicode tables. static const unsigned kMaxAsciiCharCode = 127; - if (character > kMaxAsciiCharCode && c <= kMaxAsciiCharCode) + if (!unicode && character > kMaxAsciiCharCode && c <= kMaxAsciiCharCode) continue; // Skip characters that can't appear in one byte strings. - if (ascii_subject && c > kMaxOneByteCharCode) + if (!unicode && ascii_subject && c > kMaxOneByteCharCode) continue; // Watch for duplicates. @@ -269,10 +315,45 @@ GetCaseIndependentLetters(char16_t character, return count; } +static int +GetCaseIndependentLetters(char16_t character, + bool ascii_subject, + bool unicode, + char16_t* letters) +{ + if (unicode) { + const char16_t choices[] = { + character, + unicode::FoldCase(character), + unicode::ReverseFoldCase1(character), + unicode::ReverseFoldCase2(character), + unicode::ReverseFoldCase3(character), + }; + return GetCaseIndependentLetters(character, ascii_subject, unicode, + choices, ArrayLength(choices), letters); + } + + const char16_t choices[] = { + character, + unicode::ToLowerCase(character), + unicode::ToUpperCase(character) + }; + return GetCaseIndependentLetters(character, ascii_subject, unicode, + choices, ArrayLength(choices), letters); +} + static char16_t -ConvertNonLatin1ToLatin1(char16_t c) +ConvertNonLatin1ToLatin1(char16_t c, bool unicode) { MOZ_ASSERT(c > kMaxOneByteCharCode); + if (unicode) { + switch (c) { +#define CONVERT(C, F) case C: return F; +FOR_EACH_NON_ASCII_TO_ASCII_FOLDING(CONVERT) +#undef CONVERT + } + } + switch (c) { // This are equivalent characters in unicode. case 0x39c: @@ -287,12 +368,12 @@ ConvertNonLatin1ToLatin1(char16_t c) } void -CharacterRange::AddCaseEquivalents(bool is_ascii, CharacterRangeVector* ranges) +CharacterRange::AddCaseEquivalents(bool is_ascii, bool unicode, CharacterRangeVector* ranges) { char16_t bottom = from(); char16_t top = to(); - if (is_ascii && !RangeContainsLatin1Equivalents(*this)) { + if (is_ascii && !RangeContainsLatin1Equivalents(*this, unicode)) { if (bottom > kMaxOneByteCharCode) return; if (top > kMaxOneByteCharCode) @@ -301,7 +382,7 @@ CharacterRange::AddCaseEquivalents(bool is_ascii, CharacterRangeVector* ranges) for (char16_t c = bottom;; c++) { char16_t chars[kEcma262UnCanonicalizeMaxWidth]; - size_t length = GetCaseIndependentLetters(c, is_ascii, chars); + size_t length = GetCaseIndependentLetters(c, is_ascii, unicode, chars); for (size_t i = 0; i < length; i++) { char16_t other = chars[i]; @@ -581,7 +662,7 @@ SeqRegExpNode::FillInBMInfo(int offset, } RegExpNode* -SeqRegExpNode::FilterASCII(int depth, bool ignore_case) +SeqRegExpNode::FilterASCII(int depth, bool ignore_case, bool unicode) { if (info()->replacement_calculated) return replacement(); @@ -591,13 +672,13 @@ SeqRegExpNode::FilterASCII(int depth, bool ignore_case) MOZ_ASSERT(!info()->visited); VisitMarker marker(info()); - return FilterSuccessor(depth - 1, ignore_case); + return FilterSuccessor(depth - 1, ignore_case, unicode); } RegExpNode* -SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) +SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case, bool unicode) { - RegExpNode* next = on_success_->FilterASCII(depth - 1, ignore_case); + RegExpNode* next = on_success_->FilterASCII(depth - 1, ignore_case, unicode); if (next == nullptr) return set_replacement(nullptr); @@ -740,7 +821,7 @@ TextNode::GreedyLoopTextLength() } RegExpNode* -TextNode::FilterASCII(int depth, bool ignore_case) +TextNode::FilterASCII(int depth, bool ignore_case, bool unicode) { if (info()->replacement_calculated) return replacement(); @@ -764,7 +845,7 @@ TextNode::FilterASCII(int depth, bool ignore_case) // Here, we need to check for characters whose upper and lower cases // are outside the Latin-1 range. - char16_t converted = ConvertNonLatin1ToLatin1(c); + char16_t converted = ConvertNonLatin1ToLatin1(c, unicode); if (converted == 0) { // Character is outside Latin-1 completely return set_replacement(nullptr); @@ -789,7 +870,7 @@ TextNode::FilterASCII(int depth, bool ignore_case) ranges[0].to() >= kMaxOneByteCharCode) { // This will be handled in a later filter. - if (ignore_case && RangesContainLatin1Equivalents(ranges)) + if (ignore_case && RangesContainLatin1Equivalents(ranges, unicode)) continue; return set_replacement(nullptr); } @@ -798,14 +879,14 @@ TextNode::FilterASCII(int depth, bool ignore_case) ranges[0].from() > kMaxOneByteCharCode) { // This will be handled in a later filter. - if (ignore_case && RangesContainLatin1Equivalents(ranges)) + if (ignore_case && RangesContainLatin1Equivalents(ranges, unicode)) continue; return set_replacement(nullptr); } } } } - return FilterSuccessor(depth - 1, ignore_case); + return FilterSuccessor(depth - 1, ignore_case, unicode); } void @@ -823,7 +904,7 @@ TextNode::CalculateOffsets() } } -void TextNode::MakeCaseIndependent(bool is_ascii) +void TextNode::MakeCaseIndependent(bool is_ascii, bool unicode) { int element_count = elements().length(); for (int i = 0; i < element_count; i++) { @@ -839,7 +920,7 @@ void TextNode::MakeCaseIndependent(bool is_ascii) CharacterRangeVector& ranges = cc->ranges(alloc()); int range_count = ranges.length(); for (int j = 0; j < range_count; j++) - ranges[j].AddCaseEquivalents(is_ascii, &ranges); + ranges[j].AddCaseEquivalents(is_ascii, unicode, &ranges); } } } @@ -988,7 +1069,7 @@ ChoiceNode::FillInBMInfo(int offset, } RegExpNode* -ChoiceNode::FilterASCII(int depth, bool ignore_case) +ChoiceNode::FilterASCII(int depth, bool ignore_case, bool unicode) { if (info()->replacement_calculated) return replacement(); @@ -1012,7 +1093,7 @@ ChoiceNode::FilterASCII(int depth, bool ignore_case) for (int i = 0; i < choice_count; i++) { GuardedAlternative alternative = alternatives()[i]; RegExpNode* replacement = - alternative.node()->FilterASCII(depth - 1, ignore_case); + alternative.node()->FilterASCII(depth - 1, ignore_case, unicode); MOZ_ASSERT(replacement != this); // No missing EMPTY_MATCH_CHECK. if (replacement != nullptr) { alternatives()[i].set_node(replacement); @@ -1033,7 +1114,7 @@ ChoiceNode::FilterASCII(int depth, bool ignore_case) new_alternatives.reserve(surviving); for (int i = 0; i < choice_count; i++) { RegExpNode* replacement = - alternatives()[i].node()->FilterASCII(depth - 1, ignore_case); + alternatives()[i].node()->FilterASCII(depth - 1, ignore_case, unicode); if (replacement != nullptr) { alternatives()[i].set_node(replacement); AutoEnterOOMUnsafeRegion oomUnsafe; @@ -1090,7 +1171,7 @@ NegativeLookaheadChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, } RegExpNode* -NegativeLookaheadChoiceNode::FilterASCII(int depth, bool ignore_case) +NegativeLookaheadChoiceNode::FilterASCII(int depth, bool ignore_case, bool unicode) { if (info()->replacement_calculated) return replacement(); @@ -1104,14 +1185,14 @@ NegativeLookaheadChoiceNode::FilterASCII(int depth, bool ignore_case) // Alternative 0 is the negative lookahead, alternative 1 is what comes // afterwards. RegExpNode* node = alternatives()[1].node(); - RegExpNode* replacement = node->FilterASCII(depth - 1, ignore_case); + RegExpNode* replacement = node->FilterASCII(depth - 1, ignore_case, unicode); if (replacement == nullptr) return set_replacement(nullptr); alternatives()[1].set_node(replacement); RegExpNode* neg_node = alternatives()[0].node(); - RegExpNode* neg_replacement = neg_node->FilterASCII(depth - 1, ignore_case); + RegExpNode* neg_replacement = neg_node->FilterASCII(depth - 1, ignore_case, unicode); // If the negative lookahead is always going to fail then // we don't need to check it. @@ -1192,7 +1273,7 @@ LoopChoiceNode::FillInBMInfo(int offset, } RegExpNode* -LoopChoiceNode::FilterASCII(int depth, bool ignore_case) +LoopChoiceNode::FilterASCII(int depth, bool ignore_case, bool unicode) { if (info()->replacement_calculated) return replacement(); @@ -1205,7 +1286,7 @@ LoopChoiceNode::FilterASCII(int depth, bool ignore_case) VisitMarker marker(info()); RegExpNode* continue_replacement = - continue_node_->FilterASCII(depth - 1, ignore_case); + continue_node_->FilterASCII(depth - 1, ignore_case, unicode); // If we can't continue after the loop then there is no sense in doing the // loop. @@ -1213,7 +1294,7 @@ LoopChoiceNode::FilterASCII(int depth, bool ignore_case) return set_replacement(nullptr); } - return ChoiceNode::FilterASCII(depth - 1, ignore_case); + return ChoiceNode::FilterASCII(depth - 1, ignore_case, unicode); } // ------------------------------------------------------------------- @@ -1242,7 +1323,7 @@ void Analysis::VisitText(TextNode* that) { if (ignore_case_) - that->MakeCaseIndependent(is_ascii_); + that->MakeCaseIndependent(is_ascii_, unicode_); EnsureAnalyzed(that->on_success()); if (!has_failed()) { that->CalculateOffsets(); @@ -1534,7 +1615,7 @@ class irregexp::RegExpCompiler { public: RegExpCompiler(JSContext* cx, LifoAlloc* alloc, int capture_count, - bool ignore_case, bool is_ascii, bool match_only); + bool ignore_case, bool is_ascii, bool match_only, bool unicode); int AllocateRegister() { if (next_register_ >= RegExpMacroAssembler::kMaxRegister) { @@ -1571,6 +1652,7 @@ class irregexp::RegExpCompiler inline bool ignore_case() { return ignore_case_; } inline bool ascii() { return ascii_; } + inline bool unicode() { return unicode_; } FrequencyCollator* frequency_collator() { return &frequency_collator_; } int current_expansion_factor() { return current_expansion_factor_; } @@ -1592,6 +1674,7 @@ class irregexp::RegExpCompiler bool ignore_case_; bool ascii_; bool match_only_; + bool unicode_; bool reg_exp_too_big_; int current_expansion_factor_; FrequencyCollator frequency_collator_; @@ -1614,12 +1697,13 @@ class RecursionCheck // Attempts to compile the regexp using an Irregexp code generator. Returns // a fixed array or a null handle depending on whether it succeeded. RegExpCompiler::RegExpCompiler(JSContext* cx, LifoAlloc* alloc, int capture_count, - bool ignore_case, bool ascii, bool match_only) + bool ignore_case, bool ascii, bool match_only, bool unicode) : next_register_(2 * (capture_count + 1)), recursion_depth_(0), ignore_case_(ignore_case), ascii_(ascii), match_only_(match_only), + unicode_(unicode), reg_exp_too_big_(false), current_expansion_factor_(1), frequency_collator_(), @@ -1692,7 +1776,8 @@ IsNativeRegExpEnabled(JSContext* cx) RegExpCode irregexp::CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData* data, HandleLinearString sample, bool is_global, bool ignore_case, - bool is_ascii, bool match_only, bool force_bytecode, bool sticky) + bool is_ascii, bool match_only, bool force_bytecode, bool sticky, + bool unicode) { if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { JS_ReportError(cx, "regexp too big"); @@ -1700,7 +1785,8 @@ irregexp::CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData* } LifoAlloc& alloc = cx->tempLifoAlloc(); - RegExpCompiler compiler(cx, &alloc, data->capture_count, ignore_case, is_ascii, match_only); + RegExpCompiler compiler(cx, &alloc, data->capture_count, ignore_case, is_ascii, match_only, + unicode); // Sample some characters from the middle of the string. if (sample->hasLatin1Chars()) { @@ -1746,18 +1832,18 @@ irregexp::CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData* } } if (is_ascii) { - node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case); + node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case, unicode); // Do it again to propagate the new nodes to places where they were not // put because they had not been calculated yet. if (node != nullptr) { - node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case); + node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case, unicode); } } if (node == nullptr) node = alloc.newInfallible(&alloc, EndNode::BACKTRACK); - Analysis analysis(cx, ignore_case, is_ascii); + Analysis analysis(cx, ignore_case, is_ascii, unicode); analysis.EnsureAnalyzed(node); if (analysis.has_failed()) { JS_ReportError(cx, analysis.errorMessage()); @@ -3597,7 +3683,7 @@ EmitAtomNonLetter(RegExpCompiler* compiler, RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); bool ascii = compiler->ascii(); char16_t chars[kEcma262UnCanonicalizeMaxWidth]; - int length = GetCaseIndependentLetters(c, ascii, chars); + int length = GetCaseIndependentLetters(c, ascii, compiler->unicode(), chars); if (length < 1) { // This can't match. Must be an ASCII subject and a non-ASCII character. // We do not need to do anything since the ASCII pass already handled this. @@ -3673,7 +3759,7 @@ EmitAtomLetter(RegExpCompiler* compiler, RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); bool ascii = compiler->ascii(); char16_t chars[kEcma262UnCanonicalizeMaxWidth]; - int length = GetCaseIndependentLetters(c, ascii, chars); + int length = GetCaseIndependentLetters(c, ascii, compiler->unicode(), chars); if (length <= 1) return false; // We may not need to check against the end of the input string // if this character lies before a character that matched. @@ -4538,7 +4624,8 @@ BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) MOZ_ASSERT(start_reg_ + 1 == end_reg_); if (compiler->ignore_case()) { assembler->CheckNotBackReferenceIgnoreCase(start_reg_, - trace->backtrack()); + trace->backtrack(), + compiler->unicode()); } else { assembler->CheckNotBackReference(start_reg_, trace->backtrack()); } @@ -4684,6 +4771,7 @@ TextNode::FillInBMInfo(int initial_offset, char16_t chars[kEcma262UnCanonicalizeMaxWidth]; int length = GetCaseIndependentLetters(character, bm->max_char() == kMaxOneByteCharCode, + bm->compiler()->unicode(), chars); for (int j = 0; j < length; j++) bm->Set(offset, chars[j]); @@ -4775,7 +4863,8 @@ TextNode::GetQuickCheckDetails(QuickCheckDetails* details, } if (compiler->ignore_case()) { char16_t chars[kEcma262UnCanonicalizeMaxWidth]; - size_t length = GetCaseIndependentLetters(c, compiler->ascii(), chars); + size_t length = GetCaseIndependentLetters(c, compiler->ascii(), + compiler->unicode(), chars); MOZ_ASSERT(length != 0); // Can only happen if c > char_mask (see above). if (length == 1) { // This letter has no case equivalents, so it's nice and simple diff --git a/js/src/irregexp/RegExpEngine.h b/js/src/irregexp/RegExpEngine.h index 8b8821eaa..356366f61 100644 --- a/js/src/irregexp/RegExpEngine.h +++ b/js/src/irregexp/RegExpEngine.h @@ -88,7 +88,8 @@ struct RegExpCode RegExpCode CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData* data, HandleLinearString sample, bool is_global, bool ignore_case, - bool is_ascii, bool match_only, bool force_bytecode, bool sticky); + bool is_ascii, bool match_only, bool force_bytecode, bool sticky, + bool unicode); // Note: this may return RegExpRunStatus_Error if an interrupt was requested // while the code was executing. @@ -145,7 +146,7 @@ class CharacterRange static void AddClassEscape(LifoAlloc* alloc, char16_t type, CharacterRangeVector* ranges); static void AddClassEscapeUnicode(LifoAlloc* alloc, char16_t type, - CharacterRangeVector* ranges); + CharacterRangeVector* ranges, bool ignoreCase); static inline CharacterRange Singleton(char16_t value) { return CharacterRange(value, value); @@ -165,7 +166,7 @@ class CharacterRange bool is_valid() { return from_ <= to_; } bool IsEverything(char16_t max) { return from_ == 0 && to_ >= max; } bool IsSingleton() { return (from_ == to_); } - void AddCaseEquivalents(bool is_ascii, CharacterRangeVector* ranges); + void AddCaseEquivalents(bool is_ascii, bool unicode, CharacterRangeVector* ranges); static void Split(const LifoAlloc* alloc, CharacterRangeVector base, @@ -518,7 +519,7 @@ class RegExpNode // If we know that the input is ASCII then there are some nodes that can // never match. This method returns a node that can be substituted for // itself, or nullptr if the node can never match. - virtual RegExpNode* FilterASCII(int depth, bool ignore_case) { return this; } + virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode) { return this; } // Helper for FilterASCII. RegExpNode* replacement() { @@ -625,14 +626,14 @@ class SeqRegExpNode : public RegExpNode RegExpNode* on_success() { return on_success_; } void set_on_success(RegExpNode* node) { on_success_ = node; } - virtual RegExpNode* FilterASCII(int depth, bool ignore_case); + virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode); virtual bool FillInBMInfo(int offset, int budget, BoyerMooreLookahead* bm, bool not_at_start); protected: - RegExpNode* FilterSuccessor(int depth, bool ignore_case); + RegExpNode* FilterSuccessor(int depth, bool ignore_case, bool unicode); private: RegExpNode* on_success_; @@ -750,7 +751,7 @@ class TextNode : public SeqRegExpNode int characters_filled_in, bool not_at_start); TextElementVector& elements() { return *elements_; } - void MakeCaseIndependent(bool is_ascii); + void MakeCaseIndependent(bool is_ascii, bool unicode); virtual int GreedyLoopTextLength(); virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( RegExpCompiler* compiler); @@ -759,7 +760,7 @@ class TextNode : public SeqRegExpNode BoyerMooreLookahead* bm, bool not_at_start); void CalculateOffsets(); - virtual RegExpNode* FilterASCII(int depth, bool ignore_case); + virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode); private: enum TextEmitPassType { @@ -1013,7 +1014,7 @@ class ChoiceNode : public RegExpNode void set_not_at_start() { not_at_start_ = true; } void set_being_calculated(bool b) { being_calculated_ = b; } virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } - virtual RegExpNode* FilterASCII(int depth, bool ignore_case); + virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode); protected: int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative); @@ -1066,7 +1067,7 @@ class NegativeLookaheadChoiceNode : public ChoiceNode // characters, but on a negative lookahead the negative branch did not take // part in that calculation (EatsAtLeast) so the assumptions don't hold. virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } - virtual RegExpNode* FilterASCII(int depth, bool ignore_case); + virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode); }; class LoopChoiceNode : public ChoiceNode @@ -1095,7 +1096,7 @@ class LoopChoiceNode : public ChoiceNode RegExpNode* continue_node() { return continue_node_; } bool body_can_be_zero_length() { return body_can_be_zero_length_; } virtual void Accept(NodeVisitor* visitor); - virtual RegExpNode* FilterASCII(int depth, bool ignore_case); + virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode); private: // AddAlternative is made private for loop nodes because alternatives @@ -1466,10 +1467,11 @@ class NodeVisitor class Analysis : public NodeVisitor { public: - Analysis(JSContext* cx, bool ignore_case, bool is_ascii) + Analysis(JSContext* cx, bool ignore_case, bool is_ascii, bool unicode) : cx(cx), ignore_case_(ignore_case), is_ascii_(is_ascii), + unicode_(unicode), error_message_(nullptr) {} @@ -1494,6 +1496,7 @@ class Analysis : public NodeVisitor JSContext* cx; bool ignore_case_; bool is_ascii_; + bool unicode_; const char* error_message_; Analysis(Analysis&) = delete; diff --git a/js/src/irregexp/RegExpInterpreter.cpp b/js/src/irregexp/RegExpInterpreter.cpp index 2a4bfb20a..1258c203d 100644 --- a/js/src/irregexp/RegExpInterpreter.cpp +++ b/js/src/irregexp/RegExpInterpreter.cpp @@ -442,6 +442,27 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha } break; } + BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE) { + int from = registers[insn >> BYTECODE_SHIFT]; + int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; + if (from < 0 || len <= 0) { + pc += BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_LENGTH; + break; + } + if (current + len > length) { + pc = byteCode + Load32Aligned(pc + 4); + break; + } + if (CaseInsensitiveCompareUCStrings(chars + from, chars + current, + len * sizeof(CharT))) + { + current += len; + pc += BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_LENGTH; + } else { + pc = byteCode + Load32Aligned(pc + 4); + } + break; + } BYTECODE(CHECK_AT_START) if (current == 0) pc = byteCode + Load32Aligned(pc + 4); diff --git a/js/src/irregexp/RegExpMacroAssembler.cpp b/js/src/irregexp/RegExpMacroAssembler.cpp index 197c3f3b4..d66d0d204 100644 --- a/js/src/irregexp/RegExpMacroAssembler.cpp +++ b/js/src/irregexp/RegExpMacroAssembler.cpp @@ -65,6 +65,38 @@ template int irregexp::CaseInsensitiveCompareStrings(const char16_t* substring1, const char16_t* substring2, size_t byteLength); +template +int +irregexp::CaseInsensitiveCompareUCStrings(const CharT* substring1, const CharT* substring2, + size_t byteLength) +{ + MOZ_ASSERT(byteLength % sizeof(CharT) == 0); + size_t length = byteLength / sizeof(CharT); + + for (size_t i = 0; i < length; i++) { + char16_t c1 = substring1[i]; + char16_t c2 = substring2[i]; + if (c1 != c2) { + c1 = unicode::FoldCase(c1); + c2 = unicode::FoldCase(c2); + if (c1 != c2) + return 0; + } + } + + return 1; +} + +template int +irregexp::CaseInsensitiveCompareUCStrings(const Latin1Char* substring1, + const Latin1Char* substring2, + size_t byteLength); + +template int +irregexp::CaseInsensitiveCompareUCStrings(const char16_t* substring1, + const char16_t* substring2, + size_t byteLength); + InterpretedRegExpMacroAssembler::InterpretedRegExpMacroAssembler(LifoAlloc* alloc, RegExpShared* shared, size_t numSavedRegisters) : RegExpMacroAssembler(*alloc, shared, numSavedRegisters), @@ -210,11 +242,16 @@ InterpretedRegExpMacroAssembler::CheckNotBackReference(int start_reg, jit::Label } void -InterpretedRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match) +InterpretedRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, + jit::Label* on_no_match, + bool unicode) { MOZ_ASSERT(start_reg >= 0); MOZ_ASSERT(start_reg <= kMaxRegister); - Emit(BC_CHECK_NOT_BACK_REF_NO_CASE, start_reg); + if (unicode) + Emit(BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE, start_reg); + else + Emit(BC_CHECK_NOT_BACK_REF_NO_CASE, start_reg); EmitOrLink(on_no_match); } diff --git a/js/src/irregexp/RegExpMacroAssembler.h b/js/src/irregexp/RegExpMacroAssembler.h index 0111e37e0..940033d31 100644 --- a/js/src/irregexp/RegExpMacroAssembler.h +++ b/js/src/irregexp/RegExpMacroAssembler.h @@ -112,7 +112,8 @@ class MOZ_STACK_CLASS RegExpMacroAssembler virtual void CheckGreedyLoop(jit::Label* on_tos_equals_current_position) = 0; virtual void CheckNotAtStart(jit::Label* on_not_at_start) = 0; virtual void CheckNotBackReference(int start_reg, jit::Label* on_no_match) = 0; - virtual void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match) = 0; + virtual void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match, + bool unicode) = 0; // Check the current character for a match with a literal character. If we // fail to match then goto the on_failure label. End of input always @@ -221,6 +222,11 @@ template int CaseInsensitiveCompareStrings(const CharT* substring1, const CharT* substring2, size_t byteLength); +template +int +CaseInsensitiveCompareUCStrings(const CharT* substring1, const CharT* substring2, + size_t byteLength); + class MOZ_STACK_CLASS InterpretedRegExpMacroAssembler : public RegExpMacroAssembler { public: @@ -241,7 +247,7 @@ class MOZ_STACK_CLASS InterpretedRegExpMacroAssembler : public RegExpMacroAssemb void CheckGreedyLoop(jit::Label* on_tos_equals_current_position); void CheckNotAtStart(jit::Label* on_not_at_start); void CheckNotBackReference(int start_reg, jit::Label* on_no_match); - void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match); + void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match, bool unicode); void CheckNotCharacter(unsigned c, jit::Label* on_not_equal); void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_not_equal); void CheckNotCharacterAfterMinusAnd(char16_t c, char16_t minus, char16_t and_with, diff --git a/js/src/irregexp/RegExpParser.cpp b/js/src/irregexp/RegExpParser.cpp index 976a87d8d..1d582052a 100644 --- a/js/src/irregexp/RegExpParser.cpp +++ b/js/src/irregexp/RegExpParser.cpp @@ -206,7 +206,7 @@ RegExpBuilder::AddQuantifierToAtom(int min, int max, template RegExpParser::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc, const CharT* chars, const CharT* end, bool multiline_mode, - bool unicode) + bool unicode, bool ignore_case) : ts(ts), alloc(alloc), captures_(nullptr), @@ -217,6 +217,7 @@ RegExpParser::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc, has_more_(true), multiline_(multiline_mode), unicode_(unicode), + ignore_case_(ignore_case), simple_(false), contains_anchor_(false), is_scanned_for_captures_(false) @@ -609,10 +610,11 @@ AddCharOrEscapeUnicode(LifoAlloc* alloc, CharacterRangeVector* trail_ranges, WideCharRangeVector* wide_ranges, char16_t char_class, - widechar c) + widechar c, + bool ignore_case) { if (char_class != kNoCharClass) { - CharacterRange::AddClassEscapeUnicode(alloc, char_class, ranges); + CharacterRange::AddClassEscapeUnicode(alloc, char_class, ranges, ignore_case); switch (char_class) { case 'S': case 'W': @@ -896,7 +898,7 @@ RegExpParser::ParseCharacterClass() } else if (current() == ']') { if (unicode_) { AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, - char_class, first); + char_class, first, ignore_case_); } else { AddCharOrEscape(alloc, ranges, char_class, first); } @@ -926,7 +928,7 @@ RegExpParser::ParseCharacterClass() } else { if (unicode_) { AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, - char_class, first); + char_class, first, ignore_case_); } else { AddCharOrEscape(alloc, ranges, char_class, first); } @@ -1228,13 +1230,14 @@ UnicodeEverythingAtom(LifoAlloc* alloc) } RegExpTree* -UnicodeCharacterClassEscapeAtom(LifoAlloc* alloc, char16_t char_class) +UnicodeCharacterClassEscapeAtom(LifoAlloc* alloc, char16_t char_class, bool ignore_case) { CharacterRangeVector* ranges = alloc->newInfallible(*alloc); CharacterRangeVector* lead_ranges = alloc->newInfallible(*alloc); CharacterRangeVector* trail_ranges = alloc->newInfallible(*alloc); WideCharRangeVector* wide_ranges = alloc->newInfallible(*alloc); - AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, char_class, 0); + AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, char_class, 0, + ignore_case); return UnicodeRangesAtom(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, false); } @@ -1406,7 +1409,8 @@ RegExpParser::ParseDisjunction() case 'D': case 'S': case 'W': if (unicode_) { Advance(); - builder->AddAtom(UnicodeCharacterClassEscapeAtom(alloc, current())); + builder->AddAtom(UnicodeCharacterClassEscapeAtom(alloc, current(), + ignore_case_)); Advance(); break; } @@ -1416,7 +1420,10 @@ RegExpParser::ParseDisjunction() Advance(2); CharacterRangeVector* ranges = alloc->newInfallible(*alloc); - CharacterRange::AddClassEscape(alloc, c, ranges); + if (unicode_) + CharacterRange::AddClassEscapeUnicode(alloc, c, ranges, ignore_case_); + else + CharacterRange::AddClassEscape(alloc, c, ranges); RegExpTree* atom = alloc->newInfallible(ranges, false); builder->AddAtom(atom); break; @@ -1628,7 +1635,8 @@ template class irregexp::RegExpParser; template static bool ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length, - bool multiline, bool match_only, bool unicode, RegExpCompileData* data) + bool multiline, bool match_only, bool unicode, bool ignore_case, + RegExpCompileData* data) { if (match_only) { // Try to strip a leading '.*' from the RegExp, but only if it is not @@ -1651,7 +1659,7 @@ ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, si } } - RegExpParser parser(ts, &alloc, chars, chars + length, multiline, unicode); + RegExpParser parser(ts, &alloc, chars, chars + length, multiline, unicode, ignore_case); data->tree = parser.ParsePattern(); if (!data->tree) return false; @@ -1664,15 +1672,15 @@ ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, si bool irregexp::ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str, - bool multiline, bool match_only, bool unicode, + bool multiline, bool match_only, bool unicode, bool ignore_case, RegExpCompileData* data) { JS::AutoCheckCannotGC nogc; return str->hasLatin1Chars() ? ::ParsePattern(ts, alloc, str->latin1Chars(nogc), str->length(), - multiline, match_only, unicode, data) + multiline, match_only, unicode, ignore_case, data) : ::ParsePattern(ts, alloc, str->twoByteChars(nogc), str->length(), - multiline, match_only, unicode, data); + multiline, match_only, unicode, ignore_case, data); } template @@ -1682,7 +1690,7 @@ ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* cha { LifoAllocScope scope(&alloc); - RegExpParser parser(ts, &alloc, chars, chars + length, false, unicode); + RegExpParser parser(ts, &alloc, chars, chars + length, false, unicode, false); return parser.ParsePattern() != nullptr; } diff --git a/js/src/irregexp/RegExpParser.h b/js/src/irregexp/RegExpParser.h index 8f7cf6304..34d0620d6 100644 --- a/js/src/irregexp/RegExpParser.h +++ b/js/src/irregexp/RegExpParser.h @@ -43,7 +43,7 @@ namespace irregexp { bool ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str, - bool multiline, bool match_only, bool unicode, + bool multiline, bool match_only, bool unicode, bool ignore_case, RegExpCompileData* data); bool @@ -175,7 +175,8 @@ class RegExpParser { public: RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc, - const CharT* chars, const CharT* end, bool multiline_mode, bool unicode); + const CharT* chars, const CharT* end, bool multiline_mode, bool unicode, + bool ignore_case); RegExpTree* ParsePattern(); RegExpTree* ParseDisjunction(); @@ -296,6 +297,7 @@ class RegExpParser bool has_more_; bool multiline_; bool unicode_; + bool ignore_case_; bool simple_; bool contains_anchor_; bool is_scanned_for_captures_; diff --git a/js/src/tests/ecma_6/RegExp/unicode-ignoreCase-ascii.js b/js/src/tests/ecma_6/RegExp/unicode-ignoreCase-ascii.js new file mode 100644 index 000000000..6d453290e --- /dev/null +++ b/js/src/tests/ecma_6/RegExp/unicode-ignoreCase-ascii.js @@ -0,0 +1,45 @@ +var BUGNUMBER = 1135377; +var summary = "Implement RegExp unicode flag -- ignoreCase flag with non-ascii to ascii map."; + +print(BUGNUMBER + ": " + summary); + +// LATIN CAPITAL LETTER Y WITH DIAERESIS +assertEqArray(/\u0178/iu.exec("\u00FF"), + ["\u00FF"]); +assertEqArray(/\u00FF/iu.exec("\u0178"), + ["\u0178"]); + +// LATIN SMALL LETTER LONG S +assertEqArray(/\u017F/iu.exec("S"), + ["S"]); +assertEqArray(/\u017F/iu.exec("s"), + ["s"]); +assertEqArray(/S/iu.exec("\u017F"), + ["\u017F"]); +assertEqArray(/s/iu.exec("\u017F"), + ["\u017F"]); + +// LATIN CAPITAL LETTER SHARP S +assertEqArray(/\u1E9E/iu.exec("\u00DF"), + ["\u00DF"]); +assertEqArray(/\u00DF/iu.exec("\u1E9E"), + ["\u1E9E"]); + +// KELVIN SIGN +assertEqArray(/\u212A/iu.exec("K"), + ["K"]); +assertEqArray(/\u212A/iu.exec("k"), + ["k"]); +assertEqArray(/K/iu.exec("\u212A"), + ["\u212A"]); +assertEqArray(/k/iu.exec("\u212A"), + ["\u212A"]); + +// ANGSTROM SIGN +assertEqArray(/\u212B/iu.exec("\u00E5"), + ["\u00E5"]); +assertEqArray(/\u00E5/iu.exec("\u212B"), + ["\u212B"]); + +if (typeof reportCompare === "function") + reportCompare(true, true); diff --git a/js/src/tests/ecma_6/RegExp/unicode-ignoreCase-escape.js b/js/src/tests/ecma_6/RegExp/unicode-ignoreCase-escape.js new file mode 100644 index 000000000..af5981be0 --- /dev/null +++ b/js/src/tests/ecma_6/RegExp/unicode-ignoreCase-escape.js @@ -0,0 +1,39 @@ +var BUGNUMBER = 1135377; +var summary = "Implement RegExp unicode flag -- ignoreCase flag with character class escape."; + +print(BUGNUMBER + ": " + summary); + +// LATIN SMALL LETTER LONG S + +assertEqArray(/\w/iu.exec("S"), + ["S"]); +assertEqArray(/\w/iu.exec("s"), + ["s"]); +assertEqArray(/\w/iu.exec("\u017F"), + ["\u017F"]); + +assertEqArray(/\W/iu.exec("S"), + ["S"]); +assertEqArray(/\W/iu.exec("s"), + ["s"]); +assertEqArray(/\W/iu.exec("\u017F"), + ["\u017F"]); + +// KELVIN SIGN + +assertEqArray(/\w/iu.exec("k"), + ["k"]); +assertEqArray(/\w/iu.exec("k"), + ["k"]); +assertEqArray(/\w/iu.exec("\u212A"), + ["\u212A"]); + +assertEqArray(/\W/iu.exec("k"), + ["k"]); +assertEqArray(/\W/iu.exec("k"), + ["k"]); +assertEqArray(/\W/iu.exec("\u212A"), + ["\u212A"]); + +if (typeof reportCompare === "function") + reportCompare(true, true); diff --git a/js/src/tests/ecma_6/RegExp/unicode-ignoreCase-negated.js b/js/src/tests/ecma_6/RegExp/unicode-ignoreCase-negated.js new file mode 100644 index 000000000..30909a515 --- /dev/null +++ b/js/src/tests/ecma_6/RegExp/unicode-ignoreCase-negated.js @@ -0,0 +1,19 @@ +var BUGNUMBER = 1135377; +var summary = "Implement RegExp unicode flag -- ignoreCase flag with negated character class."; + +print(BUGNUMBER + ": " + summary); + +assertEq(/[^A]/iu.exec("A"), + null); +assertEq(/[^a]/iu.exec("A"), + null); +assertEq(/[^A]/iu.exec("a"), + null); +assertEq(/[^a]/iu.exec("a"), + null); + +assertEqArray(/[^A]/iu.exec("b"), + ["b"]); + +if (typeof reportCompare === "function") + reportCompare(true, true); diff --git a/js/src/tests/ecma_6/RegExp/unicode-ignoreCase.js b/js/src/tests/ecma_6/RegExp/unicode-ignoreCase.js new file mode 100644 index 000000000..1eb8a0889 --- /dev/null +++ b/js/src/tests/ecma_6/RegExp/unicode-ignoreCase.js @@ -0,0 +1,2245 @@ +/* Generated by make_unicode.py DO NOT MODIFY */ + +/* + * Any copyright is dedicated to the Public Domain. + * http://creativecommons.org/licenses/publicdomain/ + */ + +var BUGNUMBER = 1135377; +var summary = "Implement RegExp unicode flag -- ignoreCase flag."; + +print(BUGNUMBER + ": " + summary); + +function test(code, ...equivs) { + var codeRe = new RegExp(String.fromCodePoint(code) + "+", "iu"); + var ans = String.fromCodePoint(code) + equivs.map(c => String.fromCodePoint(c)).join(""); + assertEqArray(codeRe.exec("<" + ans + ">"), [ans]); + codeRe = new RegExp("[" + String.fromCodePoint(code) + "]+", "iu"); + assertEqArray(codeRe.exec("<" + ans + ">"), [ans]); +} +test(0x41,0x61); +test(0x42,0x62); +test(0x43,0x63); +test(0x44,0x64); +test(0x45,0x65); +test(0x46,0x66); +test(0x47,0x67); +test(0x48,0x68); +test(0x49,0x69); +test(0x4a,0x6a); +test(0x4b,0x6b,0x212a); +test(0x4c,0x6c); +test(0x4d,0x6d); +test(0x4e,0x6e); +test(0x4f,0x6f); +test(0x50,0x70); +test(0x51,0x71); +test(0x52,0x72); +test(0x53,0x73,0x17f); +test(0x54,0x74); +test(0x55,0x75); +test(0x56,0x76); +test(0x57,0x77); +test(0x58,0x78); +test(0x59,0x79); +test(0x5a,0x7a); +test(0x61,0x41); +test(0x62,0x42); +test(0x63,0x43); +test(0x64,0x44); +test(0x65,0x45); +test(0x66,0x46); +test(0x67,0x47); +test(0x68,0x48); +test(0x69,0x49); +test(0x6a,0x4a); +test(0x6b,0x4b,0x212a); +test(0x6c,0x4c); +test(0x6d,0x4d); +test(0x6e,0x4e); +test(0x6f,0x4f); +test(0x70,0x50); +test(0x71,0x51); +test(0x72,0x52); +test(0x73,0x53,0x17f); +test(0x74,0x54); +test(0x75,0x55); +test(0x76,0x56); +test(0x77,0x57); +test(0x78,0x58); +test(0x79,0x59); +test(0x7a,0x5a); +test(0xb5,0x3bc,0x39c); +test(0xc0,0xe0); +test(0xc1,0xe1); +test(0xc2,0xe2); +test(0xc3,0xe3); +test(0xc4,0xe4); +test(0xc5,0xe5,0x212b); +test(0xc6,0xe6); +test(0xc7,0xe7); +test(0xc8,0xe8); +test(0xc9,0xe9); +test(0xca,0xea); +test(0xcb,0xeb); +test(0xcc,0xec); +test(0xcd,0xed); +test(0xce,0xee); +test(0xcf,0xef); +test(0xd0,0xf0); +test(0xd1,0xf1); +test(0xd2,0xf2); +test(0xd3,0xf3); +test(0xd4,0xf4); +test(0xd5,0xf5); +test(0xd6,0xf6); +test(0xd8,0xf8); +test(0xd9,0xf9); +test(0xda,0xfa); +test(0xdb,0xfb); +test(0xdc,0xfc); +test(0xdd,0xfd); +test(0xde,0xfe); +test(0xdf,0x1e9e); +test(0xe0,0xc0); +test(0xe1,0xc1); +test(0xe2,0xc2); +test(0xe3,0xc3); +test(0xe4,0xc4); +test(0xe5,0xc5,0x212b); +test(0xe6,0xc6); +test(0xe7,0xc7); +test(0xe8,0xc8); +test(0xe9,0xc9); +test(0xea,0xca); +test(0xeb,0xcb); +test(0xec,0xcc); +test(0xed,0xcd); +test(0xee,0xce); +test(0xef,0xcf); +test(0xf0,0xd0); +test(0xf1,0xd1); +test(0xf2,0xd2); +test(0xf3,0xd3); +test(0xf4,0xd4); +test(0xf5,0xd5); +test(0xf6,0xd6); +test(0xf8,0xd8); +test(0xf9,0xd9); +test(0xfa,0xda); +test(0xfb,0xdb); +test(0xfc,0xdc); +test(0xfd,0xdd); +test(0xfe,0xde); +test(0xff,0x178); +test(0x100,0x101); +test(0x101,0x100); +test(0x102,0x103); +test(0x103,0x102); +test(0x104,0x105); +test(0x105,0x104); +test(0x106,0x107); +test(0x107,0x106); +test(0x108,0x109); +test(0x109,0x108); +test(0x10a,0x10b); +test(0x10b,0x10a); +test(0x10c,0x10d); +test(0x10d,0x10c); +test(0x10e,0x10f); +test(0x10f,0x10e); +test(0x110,0x111); +test(0x111,0x110); +test(0x112,0x113); +test(0x113,0x112); +test(0x114,0x115); +test(0x115,0x114); +test(0x116,0x117); +test(0x117,0x116); +test(0x118,0x119); +test(0x119,0x118); +test(0x11a,0x11b); +test(0x11b,0x11a); +test(0x11c,0x11d); +test(0x11d,0x11c); +test(0x11e,0x11f); +test(0x11f,0x11e); +test(0x120,0x121); +test(0x121,0x120); +test(0x122,0x123); +test(0x123,0x122); +test(0x124,0x125); +test(0x125,0x124); +test(0x126,0x127); +test(0x127,0x126); +test(0x128,0x129); +test(0x129,0x128); +test(0x12a,0x12b); +test(0x12b,0x12a); +test(0x12c,0x12d); +test(0x12d,0x12c); +test(0x12e,0x12f); +test(0x12f,0x12e); +test(0x132,0x133); +test(0x133,0x132); +test(0x134,0x135); +test(0x135,0x134); +test(0x136,0x137); +test(0x137,0x136); +test(0x139,0x13a); +test(0x13a,0x139); +test(0x13b,0x13c); +test(0x13c,0x13b); +test(0x13d,0x13e); +test(0x13e,0x13d); +test(0x13f,0x140); +test(0x140,0x13f); +test(0x141,0x142); +test(0x142,0x141); +test(0x143,0x144); +test(0x144,0x143); +test(0x145,0x146); +test(0x146,0x145); +test(0x147,0x148); +test(0x148,0x147); +test(0x14a,0x14b); +test(0x14b,0x14a); +test(0x14c,0x14d); +test(0x14d,0x14c); +test(0x14e,0x14f); +test(0x14f,0x14e); +test(0x150,0x151); +test(0x151,0x150); +test(0x152,0x153); +test(0x153,0x152); +test(0x154,0x155); +test(0x155,0x154); +test(0x156,0x157); +test(0x157,0x156); +test(0x158,0x159); +test(0x159,0x158); +test(0x15a,0x15b); +test(0x15b,0x15a); +test(0x15c,0x15d); +test(0x15d,0x15c); +test(0x15e,0x15f); +test(0x15f,0x15e); +test(0x160,0x161); +test(0x161,0x160); +test(0x162,0x163); +test(0x163,0x162); +test(0x164,0x165); +test(0x165,0x164); +test(0x166,0x167); +test(0x167,0x166); +test(0x168,0x169); +test(0x169,0x168); +test(0x16a,0x16b); +test(0x16b,0x16a); +test(0x16c,0x16d); +test(0x16d,0x16c); +test(0x16e,0x16f); +test(0x16f,0x16e); +test(0x170,0x171); +test(0x171,0x170); +test(0x172,0x173); +test(0x173,0x172); +test(0x174,0x175); +test(0x175,0x174); +test(0x176,0x177); +test(0x177,0x176); +test(0x178,0xff); +test(0x179,0x17a); +test(0x17a,0x179); +test(0x17b,0x17c); +test(0x17c,0x17b); +test(0x17d,0x17e); +test(0x17e,0x17d); +test(0x17f,0x73,0x53); +test(0x180,0x243); +test(0x181,0x253); +test(0x182,0x183); +test(0x183,0x182); +test(0x184,0x185); +test(0x185,0x184); +test(0x186,0x254); +test(0x187,0x188); +test(0x188,0x187); +test(0x189,0x256); +test(0x18a,0x257); +test(0x18b,0x18c); +test(0x18c,0x18b); +test(0x18e,0x1dd); +test(0x18f,0x259); +test(0x190,0x25b); +test(0x191,0x192); +test(0x192,0x191); +test(0x193,0x260); +test(0x194,0x263); +test(0x195,0x1f6); +test(0x196,0x269); +test(0x197,0x268); +test(0x198,0x199); +test(0x199,0x198); +test(0x19a,0x23d); +test(0x19c,0x26f); +test(0x19d,0x272); +test(0x19e,0x220); +test(0x19f,0x275); +test(0x1a0,0x1a1); +test(0x1a1,0x1a0); +test(0x1a2,0x1a3); +test(0x1a3,0x1a2); +test(0x1a4,0x1a5); +test(0x1a5,0x1a4); +test(0x1a6,0x280); +test(0x1a7,0x1a8); +test(0x1a8,0x1a7); +test(0x1a9,0x283); +test(0x1ac,0x1ad); +test(0x1ad,0x1ac); +test(0x1ae,0x288); +test(0x1af,0x1b0); +test(0x1b0,0x1af); +test(0x1b1,0x28a); +test(0x1b2,0x28b); +test(0x1b3,0x1b4); +test(0x1b4,0x1b3); +test(0x1b5,0x1b6); +test(0x1b6,0x1b5); +test(0x1b7,0x292); +test(0x1b8,0x1b9); +test(0x1b9,0x1b8); +test(0x1bc,0x1bd); +test(0x1bd,0x1bc); +test(0x1bf,0x1f7); +test(0x1c4,0x1c6,0x1c5); +test(0x1c5,0x1c6,0x1c4); +test(0x1c6,0x1c4,0x1c5); +test(0x1c7,0x1c9,0x1c8); +test(0x1c8,0x1c9,0x1c7); +test(0x1c9,0x1c7,0x1c8); +test(0x1ca,0x1cc,0x1cb); +test(0x1cb,0x1cc,0x1ca); +test(0x1cc,0x1ca,0x1cb); +test(0x1cd,0x1ce); +test(0x1ce,0x1cd); +test(0x1cf,0x1d0); +test(0x1d0,0x1cf); +test(0x1d1,0x1d2); +test(0x1d2,0x1d1); +test(0x1d3,0x1d4); +test(0x1d4,0x1d3); +test(0x1d5,0x1d6); +test(0x1d6,0x1d5); +test(0x1d7,0x1d8); +test(0x1d8,0x1d7); +test(0x1d9,0x1da); +test(0x1da,0x1d9); +test(0x1db,0x1dc); +test(0x1dc,0x1db); +test(0x1dd,0x18e); +test(0x1de,0x1df); +test(0x1df,0x1de); +test(0x1e0,0x1e1); +test(0x1e1,0x1e0); +test(0x1e2,0x1e3); +test(0x1e3,0x1e2); +test(0x1e4,0x1e5); +test(0x1e5,0x1e4); +test(0x1e6,0x1e7); +test(0x1e7,0x1e6); +test(0x1e8,0x1e9); +test(0x1e9,0x1e8); +test(0x1ea,0x1eb); +test(0x1eb,0x1ea); +test(0x1ec,0x1ed); +test(0x1ed,0x1ec); +test(0x1ee,0x1ef); +test(0x1ef,0x1ee); +test(0x1f1,0x1f3,0x1f2); +test(0x1f2,0x1f3,0x1f1); +test(0x1f3,0x1f1,0x1f2); +test(0x1f4,0x1f5); +test(0x1f5,0x1f4); +test(0x1f6,0x195); +test(0x1f7,0x1bf); +test(0x1f8,0x1f9); +test(0x1f9,0x1f8); +test(0x1fa,0x1fb); +test(0x1fb,0x1fa); +test(0x1fc,0x1fd); +test(0x1fd,0x1fc); +test(0x1fe,0x1ff); +test(0x1ff,0x1fe); +test(0x200,0x201); +test(0x201,0x200); +test(0x202,0x203); +test(0x203,0x202); +test(0x204,0x205); +test(0x205,0x204); +test(0x206,0x207); +test(0x207,0x206); +test(0x208,0x209); +test(0x209,0x208); +test(0x20a,0x20b); +test(0x20b,0x20a); +test(0x20c,0x20d); +test(0x20d,0x20c); +test(0x20e,0x20f); +test(0x20f,0x20e); +test(0x210,0x211); +test(0x211,0x210); +test(0x212,0x213); +test(0x213,0x212); +test(0x214,0x215); +test(0x215,0x214); +test(0x216,0x217); +test(0x217,0x216); +test(0x218,0x219); +test(0x219,0x218); +test(0x21a,0x21b); +test(0x21b,0x21a); +test(0x21c,0x21d); +test(0x21d,0x21c); +test(0x21e,0x21f); +test(0x21f,0x21e); +test(0x220,0x19e); +test(0x222,0x223); +test(0x223,0x222); +test(0x224,0x225); +test(0x225,0x224); +test(0x226,0x227); +test(0x227,0x226); +test(0x228,0x229); +test(0x229,0x228); +test(0x22a,0x22b); +test(0x22b,0x22a); +test(0x22c,0x22d); +test(0x22d,0x22c); +test(0x22e,0x22f); +test(0x22f,0x22e); +test(0x230,0x231); +test(0x231,0x230); +test(0x232,0x233); +test(0x233,0x232); +test(0x23a,0x2c65); +test(0x23b,0x23c); +test(0x23c,0x23b); +test(0x23d,0x19a); +test(0x23e,0x2c66); +test(0x23f,0x2c7e); +test(0x240,0x2c7f); +test(0x241,0x242); +test(0x242,0x241); +test(0x243,0x180); +test(0x244,0x289); +test(0x245,0x28c); +test(0x246,0x247); +test(0x247,0x246); +test(0x248,0x249); +test(0x249,0x248); +test(0x24a,0x24b); +test(0x24b,0x24a); +test(0x24c,0x24d); +test(0x24d,0x24c); +test(0x24e,0x24f); +test(0x24f,0x24e); +test(0x250,0x2c6f); +test(0x251,0x2c6d); +test(0x252,0x2c70); +test(0x253,0x181); +test(0x254,0x186); +test(0x256,0x189); +test(0x257,0x18a); +test(0x259,0x18f); +test(0x25b,0x190); +test(0x25c,0xa7ab); +test(0x260,0x193); +test(0x261,0xa7ac); +test(0x263,0x194); +test(0x265,0xa78d); +test(0x266,0xa7aa); +test(0x268,0x197); +test(0x269,0x196); +test(0x26b,0x2c62); +test(0x26c,0xa7ad); +test(0x26f,0x19c); +test(0x271,0x2c6e); +test(0x272,0x19d); +test(0x275,0x19f); +test(0x27d,0x2c64); +test(0x280,0x1a6); +test(0x283,0x1a9); +test(0x287,0xa7b1); +test(0x288,0x1ae); +test(0x289,0x244); +test(0x28a,0x1b1); +test(0x28b,0x1b2); +test(0x28c,0x245); +test(0x292,0x1b7); +test(0x29d,0xa7b2); +test(0x29e,0xa7b0); +test(0x345,0x3b9,0x399,0x1fbe); +test(0x370,0x371); +test(0x371,0x370); +test(0x372,0x373); +test(0x373,0x372); +test(0x376,0x377); +test(0x377,0x376); +test(0x37b,0x3fd); +test(0x37c,0x3fe); +test(0x37d,0x3ff); +test(0x37f,0x3f3); +test(0x386,0x3ac); +test(0x388,0x3ad); +test(0x389,0x3ae); +test(0x38a,0x3af); +test(0x38c,0x3cc); +test(0x38e,0x3cd); +test(0x38f,0x3ce); +test(0x391,0x3b1); +test(0x392,0x3b2,0x3d0); +test(0x393,0x3b3); +test(0x394,0x3b4); +test(0x395,0x3b5,0x3f5); +test(0x396,0x3b6); +test(0x397,0x3b7); +test(0x398,0x3b8,0x3d1,0x3f4); +test(0x399,0x3b9,0x345,0x1fbe); +test(0x39a,0x3ba,0x3f0); +test(0x39b,0x3bb); +test(0x39c,0x3bc,0xb5); +test(0x39d,0x3bd); +test(0x39e,0x3be); +test(0x39f,0x3bf); +test(0x3a0,0x3c0,0x3d6); +test(0x3a1,0x3c1,0x3f1); +test(0x3a3,0x3c3,0x3c2); +test(0x3a4,0x3c4); +test(0x3a5,0x3c5); +test(0x3a6,0x3c6,0x3d5); +test(0x3a7,0x3c7); +test(0x3a8,0x3c8); +test(0x3a9,0x3c9,0x2126); +test(0x3aa,0x3ca); +test(0x3ab,0x3cb); +test(0x3ac,0x386); +test(0x3ad,0x388); +test(0x3ae,0x389); +test(0x3af,0x38a); +test(0x3b1,0x391); +test(0x3b2,0x392,0x3d0); +test(0x3b3,0x393); +test(0x3b4,0x394); +test(0x3b5,0x395,0x3f5); +test(0x3b6,0x396); +test(0x3b7,0x397); +test(0x3b8,0x398,0x3d1,0x3f4); +test(0x3b9,0x345,0x399,0x1fbe); +test(0x3ba,0x39a,0x3f0); +test(0x3bb,0x39b); +test(0x3bc,0xb5,0x39c); +test(0x3bd,0x39d); +test(0x3be,0x39e); +test(0x3bf,0x39f); +test(0x3c0,0x3a0,0x3d6); +test(0x3c1,0x3a1,0x3f1); +test(0x3c2,0x3c3,0x3a3); +test(0x3c3,0x3a3,0x3c2); +test(0x3c4,0x3a4); +test(0x3c5,0x3a5); +test(0x3c6,0x3a6,0x3d5); +test(0x3c7,0x3a7); +test(0x3c8,0x3a8); +test(0x3c9,0x3a9,0x2126); +test(0x3ca,0x3aa); +test(0x3cb,0x3ab); +test(0x3cc,0x38c); +test(0x3cd,0x38e); +test(0x3ce,0x38f); +test(0x3cf,0x3d7); +test(0x3d0,0x3b2,0x392); +test(0x3d1,0x3b8,0x398,0x3f4); +test(0x3d5,0x3c6,0x3a6); +test(0x3d6,0x3c0,0x3a0); +test(0x3d7,0x3cf); +test(0x3d8,0x3d9); +test(0x3d9,0x3d8); +test(0x3da,0x3db); +test(0x3db,0x3da); +test(0x3dc,0x3dd); +test(0x3dd,0x3dc); +test(0x3de,0x3df); +test(0x3df,0x3de); +test(0x3e0,0x3e1); +test(0x3e1,0x3e0); +test(0x3e2,0x3e3); +test(0x3e3,0x3e2); +test(0x3e4,0x3e5); +test(0x3e5,0x3e4); +test(0x3e6,0x3e7); +test(0x3e7,0x3e6); +test(0x3e8,0x3e9); +test(0x3e9,0x3e8); +test(0x3ea,0x3eb); +test(0x3eb,0x3ea); +test(0x3ec,0x3ed); +test(0x3ed,0x3ec); +test(0x3ee,0x3ef); +test(0x3ef,0x3ee); +test(0x3f0,0x3ba,0x39a); +test(0x3f1,0x3c1,0x3a1); +test(0x3f2,0x3f9); +test(0x3f3,0x37f); +test(0x3f4,0x3b8,0x398,0x3d1); +test(0x3f5,0x3b5,0x395); +test(0x3f7,0x3f8); +test(0x3f8,0x3f7); +test(0x3f9,0x3f2); +test(0x3fa,0x3fb); +test(0x3fb,0x3fa); +test(0x3fd,0x37b); +test(0x3fe,0x37c); +test(0x3ff,0x37d); +test(0x400,0x450); +test(0x401,0x451); +test(0x402,0x452); +test(0x403,0x453); +test(0x404,0x454); +test(0x405,0x455); +test(0x406,0x456); +test(0x407,0x457); +test(0x408,0x458); +test(0x409,0x459); +test(0x40a,0x45a); +test(0x40b,0x45b); +test(0x40c,0x45c); +test(0x40d,0x45d); +test(0x40e,0x45e); +test(0x40f,0x45f); +test(0x410,0x430); +test(0x411,0x431); +test(0x412,0x432); +test(0x413,0x433); +test(0x414,0x434); +test(0x415,0x435); +test(0x416,0x436); +test(0x417,0x437); +test(0x418,0x438); +test(0x419,0x439); +test(0x41a,0x43a); +test(0x41b,0x43b); +test(0x41c,0x43c); +test(0x41d,0x43d); +test(0x41e,0x43e); +test(0x41f,0x43f); +test(0x420,0x440); +test(0x421,0x441); +test(0x422,0x442); +test(0x423,0x443); +test(0x424,0x444); +test(0x425,0x445); +test(0x426,0x446); +test(0x427,0x447); +test(0x428,0x448); +test(0x429,0x449); +test(0x42a,0x44a); +test(0x42b,0x44b); +test(0x42c,0x44c); +test(0x42d,0x44d); +test(0x42e,0x44e); +test(0x42f,0x44f); +test(0x430,0x410); +test(0x431,0x411); +test(0x432,0x412); +test(0x433,0x413); +test(0x434,0x414); +test(0x435,0x415); +test(0x436,0x416); +test(0x437,0x417); +test(0x438,0x418); +test(0x439,0x419); +test(0x43a,0x41a); +test(0x43b,0x41b); +test(0x43c,0x41c); +test(0x43d,0x41d); +test(0x43e,0x41e); +test(0x43f,0x41f); +test(0x440,0x420); +test(0x441,0x421); +test(0x442,0x422); +test(0x443,0x423); +test(0x444,0x424); +test(0x445,0x425); +test(0x446,0x426); +test(0x447,0x427); +test(0x448,0x428); +test(0x449,0x429); +test(0x44a,0x42a); +test(0x44b,0x42b); +test(0x44c,0x42c); +test(0x44d,0x42d); +test(0x44e,0x42e); +test(0x44f,0x42f); +test(0x450,0x400); +test(0x451,0x401); +test(0x452,0x402); +test(0x453,0x403); +test(0x454,0x404); +test(0x455,0x405); +test(0x456,0x406); +test(0x457,0x407); +test(0x458,0x408); +test(0x459,0x409); +test(0x45a,0x40a); +test(0x45b,0x40b); +test(0x45c,0x40c); +test(0x45d,0x40d); +test(0x45e,0x40e); +test(0x45f,0x40f); +test(0x460,0x461); +test(0x461,0x460); +test(0x462,0x463); +test(0x463,0x462); +test(0x464,0x465); +test(0x465,0x464); +test(0x466,0x467); +test(0x467,0x466); +test(0x468,0x469); +test(0x469,0x468); +test(0x46a,0x46b); +test(0x46b,0x46a); +test(0x46c,0x46d); +test(0x46d,0x46c); +test(0x46e,0x46f); +test(0x46f,0x46e); +test(0x470,0x471); +test(0x471,0x470); +test(0x472,0x473); +test(0x473,0x472); +test(0x474,0x475); +test(0x475,0x474); +test(0x476,0x477); +test(0x477,0x476); +test(0x478,0x479); +test(0x479,0x478); +test(0x47a,0x47b); +test(0x47b,0x47a); +test(0x47c,0x47d); +test(0x47d,0x47c); +test(0x47e,0x47f); +test(0x47f,0x47e); +test(0x480,0x481); +test(0x481,0x480); +test(0x48a,0x48b); +test(0x48b,0x48a); +test(0x48c,0x48d); +test(0x48d,0x48c); +test(0x48e,0x48f); +test(0x48f,0x48e); +test(0x490,0x491); +test(0x491,0x490); +test(0x492,0x493); +test(0x493,0x492); +test(0x494,0x495); +test(0x495,0x494); +test(0x496,0x497); +test(0x497,0x496); +test(0x498,0x499); +test(0x499,0x498); +test(0x49a,0x49b); +test(0x49b,0x49a); +test(0x49c,0x49d); +test(0x49d,0x49c); +test(0x49e,0x49f); +test(0x49f,0x49e); +test(0x4a0,0x4a1); +test(0x4a1,0x4a0); +test(0x4a2,0x4a3); +test(0x4a3,0x4a2); +test(0x4a4,0x4a5); +test(0x4a5,0x4a4); +test(0x4a6,0x4a7); +test(0x4a7,0x4a6); +test(0x4a8,0x4a9); +test(0x4a9,0x4a8); +test(0x4aa,0x4ab); +test(0x4ab,0x4aa); +test(0x4ac,0x4ad); +test(0x4ad,0x4ac); +test(0x4ae,0x4af); +test(0x4af,0x4ae); +test(0x4b0,0x4b1); +test(0x4b1,0x4b0); +test(0x4b2,0x4b3); +test(0x4b3,0x4b2); +test(0x4b4,0x4b5); +test(0x4b5,0x4b4); +test(0x4b6,0x4b7); +test(0x4b7,0x4b6); +test(0x4b8,0x4b9); +test(0x4b9,0x4b8); +test(0x4ba,0x4bb); +test(0x4bb,0x4ba); +test(0x4bc,0x4bd); +test(0x4bd,0x4bc); +test(0x4be,0x4bf); +test(0x4bf,0x4be); +test(0x4c0,0x4cf); +test(0x4c1,0x4c2); +test(0x4c2,0x4c1); +test(0x4c3,0x4c4); +test(0x4c4,0x4c3); +test(0x4c5,0x4c6); +test(0x4c6,0x4c5); +test(0x4c7,0x4c8); +test(0x4c8,0x4c7); +test(0x4c9,0x4ca); +test(0x4ca,0x4c9); +test(0x4cb,0x4cc); +test(0x4cc,0x4cb); +test(0x4cd,0x4ce); +test(0x4ce,0x4cd); +test(0x4cf,0x4c0); +test(0x4d0,0x4d1); +test(0x4d1,0x4d0); +test(0x4d2,0x4d3); +test(0x4d3,0x4d2); +test(0x4d4,0x4d5); +test(0x4d5,0x4d4); +test(0x4d6,0x4d7); +test(0x4d7,0x4d6); +test(0x4d8,0x4d9); +test(0x4d9,0x4d8); +test(0x4da,0x4db); +test(0x4db,0x4da); +test(0x4dc,0x4dd); +test(0x4dd,0x4dc); +test(0x4de,0x4df); +test(0x4df,0x4de); +test(0x4e0,0x4e1); +test(0x4e1,0x4e0); +test(0x4e2,0x4e3); +test(0x4e3,0x4e2); +test(0x4e4,0x4e5); +test(0x4e5,0x4e4); +test(0x4e6,0x4e7); +test(0x4e7,0x4e6); +test(0x4e8,0x4e9); +test(0x4e9,0x4e8); +test(0x4ea,0x4eb); +test(0x4eb,0x4ea); +test(0x4ec,0x4ed); +test(0x4ed,0x4ec); +test(0x4ee,0x4ef); +test(0x4ef,0x4ee); +test(0x4f0,0x4f1); +test(0x4f1,0x4f0); +test(0x4f2,0x4f3); +test(0x4f3,0x4f2); +test(0x4f4,0x4f5); +test(0x4f5,0x4f4); +test(0x4f6,0x4f7); +test(0x4f7,0x4f6); +test(0x4f8,0x4f9); +test(0x4f9,0x4f8); +test(0x4fa,0x4fb); +test(0x4fb,0x4fa); +test(0x4fc,0x4fd); +test(0x4fd,0x4fc); +test(0x4fe,0x4ff); +test(0x4ff,0x4fe); +test(0x500,0x501); +test(0x501,0x500); +test(0x502,0x503); +test(0x503,0x502); +test(0x504,0x505); +test(0x505,0x504); +test(0x506,0x507); +test(0x507,0x506); +test(0x508,0x509); +test(0x509,0x508); +test(0x50a,0x50b); +test(0x50b,0x50a); +test(0x50c,0x50d); +test(0x50d,0x50c); +test(0x50e,0x50f); +test(0x50f,0x50e); +test(0x510,0x511); +test(0x511,0x510); +test(0x512,0x513); +test(0x513,0x512); +test(0x514,0x515); +test(0x515,0x514); +test(0x516,0x517); +test(0x517,0x516); +test(0x518,0x519); +test(0x519,0x518); +test(0x51a,0x51b); +test(0x51b,0x51a); +test(0x51c,0x51d); +test(0x51d,0x51c); +test(0x51e,0x51f); +test(0x51f,0x51e); +test(0x520,0x521); +test(0x521,0x520); +test(0x522,0x523); +test(0x523,0x522); +test(0x524,0x525); +test(0x525,0x524); +test(0x526,0x527); +test(0x527,0x526); +test(0x528,0x529); +test(0x529,0x528); +test(0x52a,0x52b); +test(0x52b,0x52a); +test(0x52c,0x52d); +test(0x52d,0x52c); +test(0x52e,0x52f); +test(0x52f,0x52e); +test(0x531,0x561); +test(0x532,0x562); +test(0x533,0x563); +test(0x534,0x564); +test(0x535,0x565); +test(0x536,0x566); +test(0x537,0x567); +test(0x538,0x568); +test(0x539,0x569); +test(0x53a,0x56a); +test(0x53b,0x56b); +test(0x53c,0x56c); +test(0x53d,0x56d); +test(0x53e,0x56e); +test(0x53f,0x56f); +test(0x540,0x570); +test(0x541,0x571); +test(0x542,0x572); +test(0x543,0x573); +test(0x544,0x574); +test(0x545,0x575); +test(0x546,0x576); +test(0x547,0x577); +test(0x548,0x578); +test(0x549,0x579); +test(0x54a,0x57a); +test(0x54b,0x57b); +test(0x54c,0x57c); +test(0x54d,0x57d); +test(0x54e,0x57e); +test(0x54f,0x57f); +test(0x550,0x580); +test(0x551,0x581); +test(0x552,0x582); +test(0x553,0x583); +test(0x554,0x584); +test(0x555,0x585); +test(0x556,0x586); +test(0x561,0x531); +test(0x562,0x532); +test(0x563,0x533); +test(0x564,0x534); +test(0x565,0x535); +test(0x566,0x536); +test(0x567,0x537); +test(0x568,0x538); +test(0x569,0x539); +test(0x56a,0x53a); +test(0x56b,0x53b); +test(0x56c,0x53c); +test(0x56d,0x53d); +test(0x56e,0x53e); +test(0x56f,0x53f); +test(0x570,0x540); +test(0x571,0x541); +test(0x572,0x542); +test(0x573,0x543); +test(0x574,0x544); +test(0x575,0x545); +test(0x576,0x546); +test(0x577,0x547); +test(0x578,0x548); +test(0x579,0x549); +test(0x57a,0x54a); +test(0x57b,0x54b); +test(0x57c,0x54c); +test(0x57d,0x54d); +test(0x57e,0x54e); +test(0x57f,0x54f); +test(0x580,0x550); +test(0x581,0x551); +test(0x582,0x552); +test(0x583,0x553); +test(0x584,0x554); +test(0x585,0x555); +test(0x586,0x556); +test(0x10a0,0x2d00); +test(0x10a1,0x2d01); +test(0x10a2,0x2d02); +test(0x10a3,0x2d03); +test(0x10a4,0x2d04); +test(0x10a5,0x2d05); +test(0x10a6,0x2d06); +test(0x10a7,0x2d07); +test(0x10a8,0x2d08); +test(0x10a9,0x2d09); +test(0x10aa,0x2d0a); +test(0x10ab,0x2d0b); +test(0x10ac,0x2d0c); +test(0x10ad,0x2d0d); +test(0x10ae,0x2d0e); +test(0x10af,0x2d0f); +test(0x10b0,0x2d10); +test(0x10b1,0x2d11); +test(0x10b2,0x2d12); +test(0x10b3,0x2d13); +test(0x10b4,0x2d14); +test(0x10b5,0x2d15); +test(0x10b6,0x2d16); +test(0x10b7,0x2d17); +test(0x10b8,0x2d18); +test(0x10b9,0x2d19); +test(0x10ba,0x2d1a); +test(0x10bb,0x2d1b); +test(0x10bc,0x2d1c); +test(0x10bd,0x2d1d); +test(0x10be,0x2d1e); +test(0x10bf,0x2d1f); +test(0x10c0,0x2d20); +test(0x10c1,0x2d21); +test(0x10c2,0x2d22); +test(0x10c3,0x2d23); +test(0x10c4,0x2d24); +test(0x10c5,0x2d25); +test(0x10c7,0x2d27); +test(0x10cd,0x2d2d); +test(0x13a0,0xab70); +test(0x13a1,0xab71); +test(0x13a2,0xab72); +test(0x13a3,0xab73); +test(0x13a4,0xab74); +test(0x13a5,0xab75); +test(0x13a6,0xab76); +test(0x13a7,0xab77); +test(0x13a8,0xab78); +test(0x13a9,0xab79); +test(0x13aa,0xab7a); +test(0x13ab,0xab7b); +test(0x13ac,0xab7c); +test(0x13ad,0xab7d); +test(0x13ae,0xab7e); +test(0x13af,0xab7f); +test(0x13b0,0xab80); +test(0x13b1,0xab81); +test(0x13b2,0xab82); +test(0x13b3,0xab83); +test(0x13b4,0xab84); +test(0x13b5,0xab85); +test(0x13b6,0xab86); +test(0x13b7,0xab87); +test(0x13b8,0xab88); +test(0x13b9,0xab89); +test(0x13ba,0xab8a); +test(0x13bb,0xab8b); +test(0x13bc,0xab8c); +test(0x13bd,0xab8d); +test(0x13be,0xab8e); +test(0x13bf,0xab8f); +test(0x13c0,0xab90); +test(0x13c1,0xab91); +test(0x13c2,0xab92); +test(0x13c3,0xab93); +test(0x13c4,0xab94); +test(0x13c5,0xab95); +test(0x13c6,0xab96); +test(0x13c7,0xab97); +test(0x13c8,0xab98); +test(0x13c9,0xab99); +test(0x13ca,0xab9a); +test(0x13cb,0xab9b); +test(0x13cc,0xab9c); +test(0x13cd,0xab9d); +test(0x13ce,0xab9e); +test(0x13cf,0xab9f); +test(0x13d0,0xaba0); +test(0x13d1,0xaba1); +test(0x13d2,0xaba2); +test(0x13d3,0xaba3); +test(0x13d4,0xaba4); +test(0x13d5,0xaba5); +test(0x13d6,0xaba6); +test(0x13d7,0xaba7); +test(0x13d8,0xaba8); +test(0x13d9,0xaba9); +test(0x13da,0xabaa); +test(0x13db,0xabab); +test(0x13dc,0xabac); +test(0x13dd,0xabad); +test(0x13de,0xabae); +test(0x13df,0xabaf); +test(0x13e0,0xabb0); +test(0x13e1,0xabb1); +test(0x13e2,0xabb2); +test(0x13e3,0xabb3); +test(0x13e4,0xabb4); +test(0x13e5,0xabb5); +test(0x13e6,0xabb6); +test(0x13e7,0xabb7); +test(0x13e8,0xabb8); +test(0x13e9,0xabb9); +test(0x13ea,0xabba); +test(0x13eb,0xabbb); +test(0x13ec,0xabbc); +test(0x13ed,0xabbd); +test(0x13ee,0xabbe); +test(0x13ef,0xabbf); +test(0x13f0,0x13f8); +test(0x13f1,0x13f9); +test(0x13f2,0x13fa); +test(0x13f3,0x13fb); +test(0x13f4,0x13fc); +test(0x13f5,0x13fd); +test(0x13f8,0x13f0); +test(0x13f9,0x13f1); +test(0x13fa,0x13f2); +test(0x13fb,0x13f3); +test(0x13fc,0x13f4); +test(0x13fd,0x13f5); +test(0x1d79,0xa77d); +test(0x1d7d,0x2c63); +test(0x1e00,0x1e01); +test(0x1e01,0x1e00); +test(0x1e02,0x1e03); +test(0x1e03,0x1e02); +test(0x1e04,0x1e05); +test(0x1e05,0x1e04); +test(0x1e06,0x1e07); +test(0x1e07,0x1e06); +test(0x1e08,0x1e09); +test(0x1e09,0x1e08); +test(0x1e0a,0x1e0b); +test(0x1e0b,0x1e0a); +test(0x1e0c,0x1e0d); +test(0x1e0d,0x1e0c); +test(0x1e0e,0x1e0f); +test(0x1e0f,0x1e0e); +test(0x1e10,0x1e11); +test(0x1e11,0x1e10); +test(0x1e12,0x1e13); +test(0x1e13,0x1e12); +test(0x1e14,0x1e15); +test(0x1e15,0x1e14); +test(0x1e16,0x1e17); +test(0x1e17,0x1e16); +test(0x1e18,0x1e19); +test(0x1e19,0x1e18); +test(0x1e1a,0x1e1b); +test(0x1e1b,0x1e1a); +test(0x1e1c,0x1e1d); +test(0x1e1d,0x1e1c); +test(0x1e1e,0x1e1f); +test(0x1e1f,0x1e1e); +test(0x1e20,0x1e21); +test(0x1e21,0x1e20); +test(0x1e22,0x1e23); +test(0x1e23,0x1e22); +test(0x1e24,0x1e25); +test(0x1e25,0x1e24); +test(0x1e26,0x1e27); +test(0x1e27,0x1e26); +test(0x1e28,0x1e29); +test(0x1e29,0x1e28); +test(0x1e2a,0x1e2b); +test(0x1e2b,0x1e2a); +test(0x1e2c,0x1e2d); +test(0x1e2d,0x1e2c); +test(0x1e2e,0x1e2f); +test(0x1e2f,0x1e2e); +test(0x1e30,0x1e31); +test(0x1e31,0x1e30); +test(0x1e32,0x1e33); +test(0x1e33,0x1e32); +test(0x1e34,0x1e35); +test(0x1e35,0x1e34); +test(0x1e36,0x1e37); +test(0x1e37,0x1e36); +test(0x1e38,0x1e39); +test(0x1e39,0x1e38); +test(0x1e3a,0x1e3b); +test(0x1e3b,0x1e3a); +test(0x1e3c,0x1e3d); +test(0x1e3d,0x1e3c); +test(0x1e3e,0x1e3f); +test(0x1e3f,0x1e3e); +test(0x1e40,0x1e41); +test(0x1e41,0x1e40); +test(0x1e42,0x1e43); +test(0x1e43,0x1e42); +test(0x1e44,0x1e45); +test(0x1e45,0x1e44); +test(0x1e46,0x1e47); +test(0x1e47,0x1e46); +test(0x1e48,0x1e49); +test(0x1e49,0x1e48); +test(0x1e4a,0x1e4b); +test(0x1e4b,0x1e4a); +test(0x1e4c,0x1e4d); +test(0x1e4d,0x1e4c); +test(0x1e4e,0x1e4f); +test(0x1e4f,0x1e4e); +test(0x1e50,0x1e51); +test(0x1e51,0x1e50); +test(0x1e52,0x1e53); +test(0x1e53,0x1e52); +test(0x1e54,0x1e55); +test(0x1e55,0x1e54); +test(0x1e56,0x1e57); +test(0x1e57,0x1e56); +test(0x1e58,0x1e59); +test(0x1e59,0x1e58); +test(0x1e5a,0x1e5b); +test(0x1e5b,0x1e5a); +test(0x1e5c,0x1e5d); +test(0x1e5d,0x1e5c); +test(0x1e5e,0x1e5f); +test(0x1e5f,0x1e5e); +test(0x1e60,0x1e61,0x1e9b); +test(0x1e61,0x1e60,0x1e9b); +test(0x1e62,0x1e63); +test(0x1e63,0x1e62); +test(0x1e64,0x1e65); +test(0x1e65,0x1e64); +test(0x1e66,0x1e67); +test(0x1e67,0x1e66); +test(0x1e68,0x1e69); +test(0x1e69,0x1e68); +test(0x1e6a,0x1e6b); +test(0x1e6b,0x1e6a); +test(0x1e6c,0x1e6d); +test(0x1e6d,0x1e6c); +test(0x1e6e,0x1e6f); +test(0x1e6f,0x1e6e); +test(0x1e70,0x1e71); +test(0x1e71,0x1e70); +test(0x1e72,0x1e73); +test(0x1e73,0x1e72); +test(0x1e74,0x1e75); +test(0x1e75,0x1e74); +test(0x1e76,0x1e77); +test(0x1e77,0x1e76); +test(0x1e78,0x1e79); +test(0x1e79,0x1e78); +test(0x1e7a,0x1e7b); +test(0x1e7b,0x1e7a); +test(0x1e7c,0x1e7d); +test(0x1e7d,0x1e7c); +test(0x1e7e,0x1e7f); +test(0x1e7f,0x1e7e); +test(0x1e80,0x1e81); +test(0x1e81,0x1e80); +test(0x1e82,0x1e83); +test(0x1e83,0x1e82); +test(0x1e84,0x1e85); +test(0x1e85,0x1e84); +test(0x1e86,0x1e87); +test(0x1e87,0x1e86); +test(0x1e88,0x1e89); +test(0x1e89,0x1e88); +test(0x1e8a,0x1e8b); +test(0x1e8b,0x1e8a); +test(0x1e8c,0x1e8d); +test(0x1e8d,0x1e8c); +test(0x1e8e,0x1e8f); +test(0x1e8f,0x1e8e); +test(0x1e90,0x1e91); +test(0x1e91,0x1e90); +test(0x1e92,0x1e93); +test(0x1e93,0x1e92); +test(0x1e94,0x1e95); +test(0x1e95,0x1e94); +test(0x1e9b,0x1e61,0x1e60); +test(0x1e9e,0xdf); +test(0x1ea0,0x1ea1); +test(0x1ea1,0x1ea0); +test(0x1ea2,0x1ea3); +test(0x1ea3,0x1ea2); +test(0x1ea4,0x1ea5); +test(0x1ea5,0x1ea4); +test(0x1ea6,0x1ea7); +test(0x1ea7,0x1ea6); +test(0x1ea8,0x1ea9); +test(0x1ea9,0x1ea8); +test(0x1eaa,0x1eab); +test(0x1eab,0x1eaa); +test(0x1eac,0x1ead); +test(0x1ead,0x1eac); +test(0x1eae,0x1eaf); +test(0x1eaf,0x1eae); +test(0x1eb0,0x1eb1); +test(0x1eb1,0x1eb0); +test(0x1eb2,0x1eb3); +test(0x1eb3,0x1eb2); +test(0x1eb4,0x1eb5); +test(0x1eb5,0x1eb4); +test(0x1eb6,0x1eb7); +test(0x1eb7,0x1eb6); +test(0x1eb8,0x1eb9); +test(0x1eb9,0x1eb8); +test(0x1eba,0x1ebb); +test(0x1ebb,0x1eba); +test(0x1ebc,0x1ebd); +test(0x1ebd,0x1ebc); +test(0x1ebe,0x1ebf); +test(0x1ebf,0x1ebe); +test(0x1ec0,0x1ec1); +test(0x1ec1,0x1ec0); +test(0x1ec2,0x1ec3); +test(0x1ec3,0x1ec2); +test(0x1ec4,0x1ec5); +test(0x1ec5,0x1ec4); +test(0x1ec6,0x1ec7); +test(0x1ec7,0x1ec6); +test(0x1ec8,0x1ec9); +test(0x1ec9,0x1ec8); +test(0x1eca,0x1ecb); +test(0x1ecb,0x1eca); +test(0x1ecc,0x1ecd); +test(0x1ecd,0x1ecc); +test(0x1ece,0x1ecf); +test(0x1ecf,0x1ece); +test(0x1ed0,0x1ed1); +test(0x1ed1,0x1ed0); +test(0x1ed2,0x1ed3); +test(0x1ed3,0x1ed2); +test(0x1ed4,0x1ed5); +test(0x1ed5,0x1ed4); +test(0x1ed6,0x1ed7); +test(0x1ed7,0x1ed6); +test(0x1ed8,0x1ed9); +test(0x1ed9,0x1ed8); +test(0x1eda,0x1edb); +test(0x1edb,0x1eda); +test(0x1edc,0x1edd); +test(0x1edd,0x1edc); +test(0x1ede,0x1edf); +test(0x1edf,0x1ede); +test(0x1ee0,0x1ee1); +test(0x1ee1,0x1ee0); +test(0x1ee2,0x1ee3); +test(0x1ee3,0x1ee2); +test(0x1ee4,0x1ee5); +test(0x1ee5,0x1ee4); +test(0x1ee6,0x1ee7); +test(0x1ee7,0x1ee6); +test(0x1ee8,0x1ee9); +test(0x1ee9,0x1ee8); +test(0x1eea,0x1eeb); +test(0x1eeb,0x1eea); +test(0x1eec,0x1eed); +test(0x1eed,0x1eec); +test(0x1eee,0x1eef); +test(0x1eef,0x1eee); +test(0x1ef0,0x1ef1); +test(0x1ef1,0x1ef0); +test(0x1ef2,0x1ef3); +test(0x1ef3,0x1ef2); +test(0x1ef4,0x1ef5); +test(0x1ef5,0x1ef4); +test(0x1ef6,0x1ef7); +test(0x1ef7,0x1ef6); +test(0x1ef8,0x1ef9); +test(0x1ef9,0x1ef8); +test(0x1efa,0x1efb); +test(0x1efb,0x1efa); +test(0x1efc,0x1efd); +test(0x1efd,0x1efc); +test(0x1efe,0x1eff); +test(0x1eff,0x1efe); +test(0x1f00,0x1f08); +test(0x1f01,0x1f09); +test(0x1f02,0x1f0a); +test(0x1f03,0x1f0b); +test(0x1f04,0x1f0c); +test(0x1f05,0x1f0d); +test(0x1f06,0x1f0e); +test(0x1f07,0x1f0f); +test(0x1f08,0x1f00); +test(0x1f09,0x1f01); +test(0x1f0a,0x1f02); +test(0x1f0b,0x1f03); +test(0x1f0c,0x1f04); +test(0x1f0d,0x1f05); +test(0x1f0e,0x1f06); +test(0x1f0f,0x1f07); +test(0x1f10,0x1f18); +test(0x1f11,0x1f19); +test(0x1f12,0x1f1a); +test(0x1f13,0x1f1b); +test(0x1f14,0x1f1c); +test(0x1f15,0x1f1d); +test(0x1f18,0x1f10); +test(0x1f19,0x1f11); +test(0x1f1a,0x1f12); +test(0x1f1b,0x1f13); +test(0x1f1c,0x1f14); +test(0x1f1d,0x1f15); +test(0x1f20,0x1f28); +test(0x1f21,0x1f29); +test(0x1f22,0x1f2a); +test(0x1f23,0x1f2b); +test(0x1f24,0x1f2c); +test(0x1f25,0x1f2d); +test(0x1f26,0x1f2e); +test(0x1f27,0x1f2f); +test(0x1f28,0x1f20); +test(0x1f29,0x1f21); +test(0x1f2a,0x1f22); +test(0x1f2b,0x1f23); +test(0x1f2c,0x1f24); +test(0x1f2d,0x1f25); +test(0x1f2e,0x1f26); +test(0x1f2f,0x1f27); +test(0x1f30,0x1f38); +test(0x1f31,0x1f39); +test(0x1f32,0x1f3a); +test(0x1f33,0x1f3b); +test(0x1f34,0x1f3c); +test(0x1f35,0x1f3d); +test(0x1f36,0x1f3e); +test(0x1f37,0x1f3f); +test(0x1f38,0x1f30); +test(0x1f39,0x1f31); +test(0x1f3a,0x1f32); +test(0x1f3b,0x1f33); +test(0x1f3c,0x1f34); +test(0x1f3d,0x1f35); +test(0x1f3e,0x1f36); +test(0x1f3f,0x1f37); +test(0x1f40,0x1f48); +test(0x1f41,0x1f49); +test(0x1f42,0x1f4a); +test(0x1f43,0x1f4b); +test(0x1f44,0x1f4c); +test(0x1f45,0x1f4d); +test(0x1f48,0x1f40); +test(0x1f49,0x1f41); +test(0x1f4a,0x1f42); +test(0x1f4b,0x1f43); +test(0x1f4c,0x1f44); +test(0x1f4d,0x1f45); +test(0x1f51,0x1f59); +test(0x1f53,0x1f5b); +test(0x1f55,0x1f5d); +test(0x1f57,0x1f5f); +test(0x1f59,0x1f51); +test(0x1f5b,0x1f53); +test(0x1f5d,0x1f55); +test(0x1f5f,0x1f57); +test(0x1f60,0x1f68); +test(0x1f61,0x1f69); +test(0x1f62,0x1f6a); +test(0x1f63,0x1f6b); +test(0x1f64,0x1f6c); +test(0x1f65,0x1f6d); +test(0x1f66,0x1f6e); +test(0x1f67,0x1f6f); +test(0x1f68,0x1f60); +test(0x1f69,0x1f61); +test(0x1f6a,0x1f62); +test(0x1f6b,0x1f63); +test(0x1f6c,0x1f64); +test(0x1f6d,0x1f65); +test(0x1f6e,0x1f66); +test(0x1f6f,0x1f67); +test(0x1f70,0x1fba); +test(0x1f71,0x1fbb); +test(0x1f72,0x1fc8); +test(0x1f73,0x1fc9); +test(0x1f74,0x1fca); +test(0x1f75,0x1fcb); +test(0x1f76,0x1fda); +test(0x1f77,0x1fdb); +test(0x1f78,0x1ff8); +test(0x1f79,0x1ff9); +test(0x1f7a,0x1fea); +test(0x1f7b,0x1feb); +test(0x1f7c,0x1ffa); +test(0x1f7d,0x1ffb); +test(0x1f80,0x1f88); +test(0x1f81,0x1f89); +test(0x1f82,0x1f8a); +test(0x1f83,0x1f8b); +test(0x1f84,0x1f8c); +test(0x1f85,0x1f8d); +test(0x1f86,0x1f8e); +test(0x1f87,0x1f8f); +test(0x1f88,0x1f80); +test(0x1f89,0x1f81); +test(0x1f8a,0x1f82); +test(0x1f8b,0x1f83); +test(0x1f8c,0x1f84); +test(0x1f8d,0x1f85); +test(0x1f8e,0x1f86); +test(0x1f8f,0x1f87); +test(0x1f90,0x1f98); +test(0x1f91,0x1f99); +test(0x1f92,0x1f9a); +test(0x1f93,0x1f9b); +test(0x1f94,0x1f9c); +test(0x1f95,0x1f9d); +test(0x1f96,0x1f9e); +test(0x1f97,0x1f9f); +test(0x1f98,0x1f90); +test(0x1f99,0x1f91); +test(0x1f9a,0x1f92); +test(0x1f9b,0x1f93); +test(0x1f9c,0x1f94); +test(0x1f9d,0x1f95); +test(0x1f9e,0x1f96); +test(0x1f9f,0x1f97); +test(0x1fa0,0x1fa8); +test(0x1fa1,0x1fa9); +test(0x1fa2,0x1faa); +test(0x1fa3,0x1fab); +test(0x1fa4,0x1fac); +test(0x1fa5,0x1fad); +test(0x1fa6,0x1fae); +test(0x1fa7,0x1faf); +test(0x1fa8,0x1fa0); +test(0x1fa9,0x1fa1); +test(0x1faa,0x1fa2); +test(0x1fab,0x1fa3); +test(0x1fac,0x1fa4); +test(0x1fad,0x1fa5); +test(0x1fae,0x1fa6); +test(0x1faf,0x1fa7); +test(0x1fb0,0x1fb8); +test(0x1fb1,0x1fb9); +test(0x1fb3,0x1fbc); +test(0x1fb8,0x1fb0); +test(0x1fb9,0x1fb1); +test(0x1fba,0x1f70); +test(0x1fbb,0x1f71); +test(0x1fbc,0x1fb3); +test(0x1fbe,0x3b9,0x345,0x399); +test(0x1fc3,0x1fcc); +test(0x1fc8,0x1f72); +test(0x1fc9,0x1f73); +test(0x1fca,0x1f74); +test(0x1fcb,0x1f75); +test(0x1fcc,0x1fc3); +test(0x1fd0,0x1fd8); +test(0x1fd1,0x1fd9); +test(0x1fd8,0x1fd0); +test(0x1fd9,0x1fd1); +test(0x1fda,0x1f76); +test(0x1fdb,0x1f77); +test(0x1fe0,0x1fe8); +test(0x1fe1,0x1fe9); +test(0x1fe5,0x1fec); +test(0x1fe8,0x1fe0); +test(0x1fe9,0x1fe1); +test(0x1fea,0x1f7a); +test(0x1feb,0x1f7b); +test(0x1fec,0x1fe5); +test(0x1ff3,0x1ffc); +test(0x1ff8,0x1f78); +test(0x1ff9,0x1f79); +test(0x1ffa,0x1f7c); +test(0x1ffb,0x1f7d); +test(0x1ffc,0x1ff3); +test(0x2126,0x3c9,0x3a9); +test(0x212a,0x6b,0x4b); +test(0x212b,0xe5,0xc5); +test(0x2132,0x214e); +test(0x214e,0x2132); +test(0x2160,0x2170); +test(0x2161,0x2171); +test(0x2162,0x2172); +test(0x2163,0x2173); +test(0x2164,0x2174); +test(0x2165,0x2175); +test(0x2166,0x2176); +test(0x2167,0x2177); +test(0x2168,0x2178); +test(0x2169,0x2179); +test(0x216a,0x217a); +test(0x216b,0x217b); +test(0x216c,0x217c); +test(0x216d,0x217d); +test(0x216e,0x217e); +test(0x216f,0x217f); +test(0x2170,0x2160); +test(0x2171,0x2161); +test(0x2172,0x2162); +test(0x2173,0x2163); +test(0x2174,0x2164); +test(0x2175,0x2165); +test(0x2176,0x2166); +test(0x2177,0x2167); +test(0x2178,0x2168); +test(0x2179,0x2169); +test(0x217a,0x216a); +test(0x217b,0x216b); +test(0x217c,0x216c); +test(0x217d,0x216d); +test(0x217e,0x216e); +test(0x217f,0x216f); +test(0x2183,0x2184); +test(0x2184,0x2183); +test(0x24b6,0x24d0); +test(0x24b7,0x24d1); +test(0x24b8,0x24d2); +test(0x24b9,0x24d3); +test(0x24ba,0x24d4); +test(0x24bb,0x24d5); +test(0x24bc,0x24d6); +test(0x24bd,0x24d7); +test(0x24be,0x24d8); +test(0x24bf,0x24d9); +test(0x24c0,0x24da); +test(0x24c1,0x24db); +test(0x24c2,0x24dc); +test(0x24c3,0x24dd); +test(0x24c4,0x24de); +test(0x24c5,0x24df); +test(0x24c6,0x24e0); +test(0x24c7,0x24e1); +test(0x24c8,0x24e2); +test(0x24c9,0x24e3); +test(0x24ca,0x24e4); +test(0x24cb,0x24e5); +test(0x24cc,0x24e6); +test(0x24cd,0x24e7); +test(0x24ce,0x24e8); +test(0x24cf,0x24e9); +test(0x24d0,0x24b6); +test(0x24d1,0x24b7); +test(0x24d2,0x24b8); +test(0x24d3,0x24b9); +test(0x24d4,0x24ba); +test(0x24d5,0x24bb); +test(0x24d6,0x24bc); +test(0x24d7,0x24bd); +test(0x24d8,0x24be); +test(0x24d9,0x24bf); +test(0x24da,0x24c0); +test(0x24db,0x24c1); +test(0x24dc,0x24c2); +test(0x24dd,0x24c3); +test(0x24de,0x24c4); +test(0x24df,0x24c5); +test(0x24e0,0x24c6); +test(0x24e1,0x24c7); +test(0x24e2,0x24c8); +test(0x24e3,0x24c9); +test(0x24e4,0x24ca); +test(0x24e5,0x24cb); +test(0x24e6,0x24cc); +test(0x24e7,0x24cd); +test(0x24e8,0x24ce); +test(0x24e9,0x24cf); +test(0x2c00,0x2c30); +test(0x2c01,0x2c31); +test(0x2c02,0x2c32); +test(0x2c03,0x2c33); +test(0x2c04,0x2c34); +test(0x2c05,0x2c35); +test(0x2c06,0x2c36); +test(0x2c07,0x2c37); +test(0x2c08,0x2c38); +test(0x2c09,0x2c39); +test(0x2c0a,0x2c3a); +test(0x2c0b,0x2c3b); +test(0x2c0c,0x2c3c); +test(0x2c0d,0x2c3d); +test(0x2c0e,0x2c3e); +test(0x2c0f,0x2c3f); +test(0x2c10,0x2c40); +test(0x2c11,0x2c41); +test(0x2c12,0x2c42); +test(0x2c13,0x2c43); +test(0x2c14,0x2c44); +test(0x2c15,0x2c45); +test(0x2c16,0x2c46); +test(0x2c17,0x2c47); +test(0x2c18,0x2c48); +test(0x2c19,0x2c49); +test(0x2c1a,0x2c4a); +test(0x2c1b,0x2c4b); +test(0x2c1c,0x2c4c); +test(0x2c1d,0x2c4d); +test(0x2c1e,0x2c4e); +test(0x2c1f,0x2c4f); +test(0x2c20,0x2c50); +test(0x2c21,0x2c51); +test(0x2c22,0x2c52); +test(0x2c23,0x2c53); +test(0x2c24,0x2c54); +test(0x2c25,0x2c55); +test(0x2c26,0x2c56); +test(0x2c27,0x2c57); +test(0x2c28,0x2c58); +test(0x2c29,0x2c59); +test(0x2c2a,0x2c5a); +test(0x2c2b,0x2c5b); +test(0x2c2c,0x2c5c); +test(0x2c2d,0x2c5d); +test(0x2c2e,0x2c5e); +test(0x2c30,0x2c00); +test(0x2c31,0x2c01); +test(0x2c32,0x2c02); +test(0x2c33,0x2c03); +test(0x2c34,0x2c04); +test(0x2c35,0x2c05); +test(0x2c36,0x2c06); +test(0x2c37,0x2c07); +test(0x2c38,0x2c08); +test(0x2c39,0x2c09); +test(0x2c3a,0x2c0a); +test(0x2c3b,0x2c0b); +test(0x2c3c,0x2c0c); +test(0x2c3d,0x2c0d); +test(0x2c3e,0x2c0e); +test(0x2c3f,0x2c0f); +test(0x2c40,0x2c10); +test(0x2c41,0x2c11); +test(0x2c42,0x2c12); +test(0x2c43,0x2c13); +test(0x2c44,0x2c14); +test(0x2c45,0x2c15); +test(0x2c46,0x2c16); +test(0x2c47,0x2c17); +test(0x2c48,0x2c18); +test(0x2c49,0x2c19); +test(0x2c4a,0x2c1a); +test(0x2c4b,0x2c1b); +test(0x2c4c,0x2c1c); +test(0x2c4d,0x2c1d); +test(0x2c4e,0x2c1e); +test(0x2c4f,0x2c1f); +test(0x2c50,0x2c20); +test(0x2c51,0x2c21); +test(0x2c52,0x2c22); +test(0x2c53,0x2c23); +test(0x2c54,0x2c24); +test(0x2c55,0x2c25); +test(0x2c56,0x2c26); +test(0x2c57,0x2c27); +test(0x2c58,0x2c28); +test(0x2c59,0x2c29); +test(0x2c5a,0x2c2a); +test(0x2c5b,0x2c2b); +test(0x2c5c,0x2c2c); +test(0x2c5d,0x2c2d); +test(0x2c5e,0x2c2e); +test(0x2c60,0x2c61); +test(0x2c61,0x2c60); +test(0x2c62,0x26b); +test(0x2c63,0x1d7d); +test(0x2c64,0x27d); +test(0x2c65,0x23a); +test(0x2c66,0x23e); +test(0x2c67,0x2c68); +test(0x2c68,0x2c67); +test(0x2c69,0x2c6a); +test(0x2c6a,0x2c69); +test(0x2c6b,0x2c6c); +test(0x2c6c,0x2c6b); +test(0x2c6d,0x251); +test(0x2c6e,0x271); +test(0x2c6f,0x250); +test(0x2c70,0x252); +test(0x2c72,0x2c73); +test(0x2c73,0x2c72); +test(0x2c75,0x2c76); +test(0x2c76,0x2c75); +test(0x2c7e,0x23f); +test(0x2c7f,0x240); +test(0x2c80,0x2c81); +test(0x2c81,0x2c80); +test(0x2c82,0x2c83); +test(0x2c83,0x2c82); +test(0x2c84,0x2c85); +test(0x2c85,0x2c84); +test(0x2c86,0x2c87); +test(0x2c87,0x2c86); +test(0x2c88,0x2c89); +test(0x2c89,0x2c88); +test(0x2c8a,0x2c8b); +test(0x2c8b,0x2c8a); +test(0x2c8c,0x2c8d); +test(0x2c8d,0x2c8c); +test(0x2c8e,0x2c8f); +test(0x2c8f,0x2c8e); +test(0x2c90,0x2c91); +test(0x2c91,0x2c90); +test(0x2c92,0x2c93); +test(0x2c93,0x2c92); +test(0x2c94,0x2c95); +test(0x2c95,0x2c94); +test(0x2c96,0x2c97); +test(0x2c97,0x2c96); +test(0x2c98,0x2c99); +test(0x2c99,0x2c98); +test(0x2c9a,0x2c9b); +test(0x2c9b,0x2c9a); +test(0x2c9c,0x2c9d); +test(0x2c9d,0x2c9c); +test(0x2c9e,0x2c9f); +test(0x2c9f,0x2c9e); +test(0x2ca0,0x2ca1); +test(0x2ca1,0x2ca0); +test(0x2ca2,0x2ca3); +test(0x2ca3,0x2ca2); +test(0x2ca4,0x2ca5); +test(0x2ca5,0x2ca4); +test(0x2ca6,0x2ca7); +test(0x2ca7,0x2ca6); +test(0x2ca8,0x2ca9); +test(0x2ca9,0x2ca8); +test(0x2caa,0x2cab); +test(0x2cab,0x2caa); +test(0x2cac,0x2cad); +test(0x2cad,0x2cac); +test(0x2cae,0x2caf); +test(0x2caf,0x2cae); +test(0x2cb0,0x2cb1); +test(0x2cb1,0x2cb0); +test(0x2cb2,0x2cb3); +test(0x2cb3,0x2cb2); +test(0x2cb4,0x2cb5); +test(0x2cb5,0x2cb4); +test(0x2cb6,0x2cb7); +test(0x2cb7,0x2cb6); +test(0x2cb8,0x2cb9); +test(0x2cb9,0x2cb8); +test(0x2cba,0x2cbb); +test(0x2cbb,0x2cba); +test(0x2cbc,0x2cbd); +test(0x2cbd,0x2cbc); +test(0x2cbe,0x2cbf); +test(0x2cbf,0x2cbe); +test(0x2cc0,0x2cc1); +test(0x2cc1,0x2cc0); +test(0x2cc2,0x2cc3); +test(0x2cc3,0x2cc2); +test(0x2cc4,0x2cc5); +test(0x2cc5,0x2cc4); +test(0x2cc6,0x2cc7); +test(0x2cc7,0x2cc6); +test(0x2cc8,0x2cc9); +test(0x2cc9,0x2cc8); +test(0x2cca,0x2ccb); +test(0x2ccb,0x2cca); +test(0x2ccc,0x2ccd); +test(0x2ccd,0x2ccc); +test(0x2cce,0x2ccf); +test(0x2ccf,0x2cce); +test(0x2cd0,0x2cd1); +test(0x2cd1,0x2cd0); +test(0x2cd2,0x2cd3); +test(0x2cd3,0x2cd2); +test(0x2cd4,0x2cd5); +test(0x2cd5,0x2cd4); +test(0x2cd6,0x2cd7); +test(0x2cd7,0x2cd6); +test(0x2cd8,0x2cd9); +test(0x2cd9,0x2cd8); +test(0x2cda,0x2cdb); +test(0x2cdb,0x2cda); +test(0x2cdc,0x2cdd); +test(0x2cdd,0x2cdc); +test(0x2cde,0x2cdf); +test(0x2cdf,0x2cde); +test(0x2ce0,0x2ce1); +test(0x2ce1,0x2ce0); +test(0x2ce2,0x2ce3); +test(0x2ce3,0x2ce2); +test(0x2ceb,0x2cec); +test(0x2cec,0x2ceb); +test(0x2ced,0x2cee); +test(0x2cee,0x2ced); +test(0x2cf2,0x2cf3); +test(0x2cf3,0x2cf2); +test(0x2d00,0x10a0); +test(0x2d01,0x10a1); +test(0x2d02,0x10a2); +test(0x2d03,0x10a3); +test(0x2d04,0x10a4); +test(0x2d05,0x10a5); +test(0x2d06,0x10a6); +test(0x2d07,0x10a7); +test(0x2d08,0x10a8); +test(0x2d09,0x10a9); +test(0x2d0a,0x10aa); +test(0x2d0b,0x10ab); +test(0x2d0c,0x10ac); +test(0x2d0d,0x10ad); +test(0x2d0e,0x10ae); +test(0x2d0f,0x10af); +test(0x2d10,0x10b0); +test(0x2d11,0x10b1); +test(0x2d12,0x10b2); +test(0x2d13,0x10b3); +test(0x2d14,0x10b4); +test(0x2d15,0x10b5); +test(0x2d16,0x10b6); +test(0x2d17,0x10b7); +test(0x2d18,0x10b8); +test(0x2d19,0x10b9); +test(0x2d1a,0x10ba); +test(0x2d1b,0x10bb); +test(0x2d1c,0x10bc); +test(0x2d1d,0x10bd); +test(0x2d1e,0x10be); +test(0x2d1f,0x10bf); +test(0x2d20,0x10c0); +test(0x2d21,0x10c1); +test(0x2d22,0x10c2); +test(0x2d23,0x10c3); +test(0x2d24,0x10c4); +test(0x2d25,0x10c5); +test(0x2d27,0x10c7); +test(0x2d2d,0x10cd); +test(0xa640,0xa641); +test(0xa641,0xa640); +test(0xa642,0xa643); +test(0xa643,0xa642); +test(0xa644,0xa645); +test(0xa645,0xa644); +test(0xa646,0xa647); +test(0xa647,0xa646); +test(0xa648,0xa649); +test(0xa649,0xa648); +test(0xa64a,0xa64b); +test(0xa64b,0xa64a); +test(0xa64c,0xa64d); +test(0xa64d,0xa64c); +test(0xa64e,0xa64f); +test(0xa64f,0xa64e); +test(0xa650,0xa651); +test(0xa651,0xa650); +test(0xa652,0xa653); +test(0xa653,0xa652); +test(0xa654,0xa655); +test(0xa655,0xa654); +test(0xa656,0xa657); +test(0xa657,0xa656); +test(0xa658,0xa659); +test(0xa659,0xa658); +test(0xa65a,0xa65b); +test(0xa65b,0xa65a); +test(0xa65c,0xa65d); +test(0xa65d,0xa65c); +test(0xa65e,0xa65f); +test(0xa65f,0xa65e); +test(0xa660,0xa661); +test(0xa661,0xa660); +test(0xa662,0xa663); +test(0xa663,0xa662); +test(0xa664,0xa665); +test(0xa665,0xa664); +test(0xa666,0xa667); +test(0xa667,0xa666); +test(0xa668,0xa669); +test(0xa669,0xa668); +test(0xa66a,0xa66b); +test(0xa66b,0xa66a); +test(0xa66c,0xa66d); +test(0xa66d,0xa66c); +test(0xa680,0xa681); +test(0xa681,0xa680); +test(0xa682,0xa683); +test(0xa683,0xa682); +test(0xa684,0xa685); +test(0xa685,0xa684); +test(0xa686,0xa687); +test(0xa687,0xa686); +test(0xa688,0xa689); +test(0xa689,0xa688); +test(0xa68a,0xa68b); +test(0xa68b,0xa68a); +test(0xa68c,0xa68d); +test(0xa68d,0xa68c); +test(0xa68e,0xa68f); +test(0xa68f,0xa68e); +test(0xa690,0xa691); +test(0xa691,0xa690); +test(0xa692,0xa693); +test(0xa693,0xa692); +test(0xa694,0xa695); +test(0xa695,0xa694); +test(0xa696,0xa697); +test(0xa697,0xa696); +test(0xa698,0xa699); +test(0xa699,0xa698); +test(0xa69a,0xa69b); +test(0xa69b,0xa69a); +test(0xa722,0xa723); +test(0xa723,0xa722); +test(0xa724,0xa725); +test(0xa725,0xa724); +test(0xa726,0xa727); +test(0xa727,0xa726); +test(0xa728,0xa729); +test(0xa729,0xa728); +test(0xa72a,0xa72b); +test(0xa72b,0xa72a); +test(0xa72c,0xa72d); +test(0xa72d,0xa72c); +test(0xa72e,0xa72f); +test(0xa72f,0xa72e); +test(0xa732,0xa733); +test(0xa733,0xa732); +test(0xa734,0xa735); +test(0xa735,0xa734); +test(0xa736,0xa737); +test(0xa737,0xa736); +test(0xa738,0xa739); +test(0xa739,0xa738); +test(0xa73a,0xa73b); +test(0xa73b,0xa73a); +test(0xa73c,0xa73d); +test(0xa73d,0xa73c); +test(0xa73e,0xa73f); +test(0xa73f,0xa73e); +test(0xa740,0xa741); +test(0xa741,0xa740); +test(0xa742,0xa743); +test(0xa743,0xa742); +test(0xa744,0xa745); +test(0xa745,0xa744); +test(0xa746,0xa747); +test(0xa747,0xa746); +test(0xa748,0xa749); +test(0xa749,0xa748); +test(0xa74a,0xa74b); +test(0xa74b,0xa74a); +test(0xa74c,0xa74d); +test(0xa74d,0xa74c); +test(0xa74e,0xa74f); +test(0xa74f,0xa74e); +test(0xa750,0xa751); +test(0xa751,0xa750); +test(0xa752,0xa753); +test(0xa753,0xa752); +test(0xa754,0xa755); +test(0xa755,0xa754); +test(0xa756,0xa757); +test(0xa757,0xa756); +test(0xa758,0xa759); +test(0xa759,0xa758); +test(0xa75a,0xa75b); +test(0xa75b,0xa75a); +test(0xa75c,0xa75d); +test(0xa75d,0xa75c); +test(0xa75e,0xa75f); +test(0xa75f,0xa75e); +test(0xa760,0xa761); +test(0xa761,0xa760); +test(0xa762,0xa763); +test(0xa763,0xa762); +test(0xa764,0xa765); +test(0xa765,0xa764); +test(0xa766,0xa767); +test(0xa767,0xa766); +test(0xa768,0xa769); +test(0xa769,0xa768); +test(0xa76a,0xa76b); +test(0xa76b,0xa76a); +test(0xa76c,0xa76d); +test(0xa76d,0xa76c); +test(0xa76e,0xa76f); +test(0xa76f,0xa76e); +test(0xa779,0xa77a); +test(0xa77a,0xa779); +test(0xa77b,0xa77c); +test(0xa77c,0xa77b); +test(0xa77d,0x1d79); +test(0xa77e,0xa77f); +test(0xa77f,0xa77e); +test(0xa780,0xa781); +test(0xa781,0xa780); +test(0xa782,0xa783); +test(0xa783,0xa782); +test(0xa784,0xa785); +test(0xa785,0xa784); +test(0xa786,0xa787); +test(0xa787,0xa786); +test(0xa78b,0xa78c); +test(0xa78c,0xa78b); +test(0xa78d,0x265); +test(0xa790,0xa791); +test(0xa791,0xa790); +test(0xa792,0xa793); +test(0xa793,0xa792); +test(0xa796,0xa797); +test(0xa797,0xa796); +test(0xa798,0xa799); +test(0xa799,0xa798); +test(0xa79a,0xa79b); +test(0xa79b,0xa79a); +test(0xa79c,0xa79d); +test(0xa79d,0xa79c); +test(0xa79e,0xa79f); +test(0xa79f,0xa79e); +test(0xa7a0,0xa7a1); +test(0xa7a1,0xa7a0); +test(0xa7a2,0xa7a3); +test(0xa7a3,0xa7a2); +test(0xa7a4,0xa7a5); +test(0xa7a5,0xa7a4); +test(0xa7a6,0xa7a7); +test(0xa7a7,0xa7a6); +test(0xa7a8,0xa7a9); +test(0xa7a9,0xa7a8); +test(0xa7aa,0x266); +test(0xa7ab,0x25c); +test(0xa7ac,0x261); +test(0xa7ad,0x26c); +test(0xa7b0,0x29e); +test(0xa7b1,0x287); +test(0xa7b2,0x29d); +test(0xa7b3,0xab53); +test(0xa7b4,0xa7b5); +test(0xa7b5,0xa7b4); +test(0xa7b6,0xa7b7); +test(0xa7b7,0xa7b6); +test(0xab53,0xa7b3); +test(0xab70,0x13a0); +test(0xab71,0x13a1); +test(0xab72,0x13a2); +test(0xab73,0x13a3); +test(0xab74,0x13a4); +test(0xab75,0x13a5); +test(0xab76,0x13a6); +test(0xab77,0x13a7); +test(0xab78,0x13a8); +test(0xab79,0x13a9); +test(0xab7a,0x13aa); +test(0xab7b,0x13ab); +test(0xab7c,0x13ac); +test(0xab7d,0x13ad); +test(0xab7e,0x13ae); +test(0xab7f,0x13af); +test(0xab80,0x13b0); +test(0xab81,0x13b1); +test(0xab82,0x13b2); +test(0xab83,0x13b3); +test(0xab84,0x13b4); +test(0xab85,0x13b5); +test(0xab86,0x13b6); +test(0xab87,0x13b7); +test(0xab88,0x13b8); +test(0xab89,0x13b9); +test(0xab8a,0x13ba); +test(0xab8b,0x13bb); +test(0xab8c,0x13bc); +test(0xab8d,0x13bd); +test(0xab8e,0x13be); +test(0xab8f,0x13bf); +test(0xab90,0x13c0); +test(0xab91,0x13c1); +test(0xab92,0x13c2); +test(0xab93,0x13c3); +test(0xab94,0x13c4); +test(0xab95,0x13c5); +test(0xab96,0x13c6); +test(0xab97,0x13c7); +test(0xab98,0x13c8); +test(0xab99,0x13c9); +test(0xab9a,0x13ca); +test(0xab9b,0x13cb); +test(0xab9c,0x13cc); +test(0xab9d,0x13cd); +test(0xab9e,0x13ce); +test(0xab9f,0x13cf); +test(0xaba0,0x13d0); +test(0xaba1,0x13d1); +test(0xaba2,0x13d2); +test(0xaba3,0x13d3); +test(0xaba4,0x13d4); +test(0xaba5,0x13d5); +test(0xaba6,0x13d6); +test(0xaba7,0x13d7); +test(0xaba8,0x13d8); +test(0xaba9,0x13d9); +test(0xabaa,0x13da); +test(0xabab,0x13db); +test(0xabac,0x13dc); +test(0xabad,0x13dd); +test(0xabae,0x13de); +test(0xabaf,0x13df); +test(0xabb0,0x13e0); +test(0xabb1,0x13e1); +test(0xabb2,0x13e2); +test(0xabb3,0x13e3); +test(0xabb4,0x13e4); +test(0xabb5,0x13e5); +test(0xabb6,0x13e6); +test(0xabb7,0x13e7); +test(0xabb8,0x13e8); +test(0xabb9,0x13e9); +test(0xabba,0x13ea); +test(0xabbb,0x13eb); +test(0xabbc,0x13ec); +test(0xabbd,0x13ed); +test(0xabbe,0x13ee); +test(0xabbf,0x13ef); +test(0xff21,0xff41); +test(0xff22,0xff42); +test(0xff23,0xff43); +test(0xff24,0xff44); +test(0xff25,0xff45); +test(0xff26,0xff46); +test(0xff27,0xff47); +test(0xff28,0xff48); +test(0xff29,0xff49); +test(0xff2a,0xff4a); +test(0xff2b,0xff4b); +test(0xff2c,0xff4c); +test(0xff2d,0xff4d); +test(0xff2e,0xff4e); +test(0xff2f,0xff4f); +test(0xff30,0xff50); +test(0xff31,0xff51); +test(0xff32,0xff52); +test(0xff33,0xff53); +test(0xff34,0xff54); +test(0xff35,0xff55); +test(0xff36,0xff56); +test(0xff37,0xff57); +test(0xff38,0xff58); +test(0xff39,0xff59); +test(0xff3a,0xff5a); +test(0xff41,0xff21); +test(0xff42,0xff22); +test(0xff43,0xff23); +test(0xff44,0xff24); +test(0xff45,0xff25); +test(0xff46,0xff26); +test(0xff47,0xff27); +test(0xff48,0xff28); +test(0xff49,0xff29); +test(0xff4a,0xff2a); +test(0xff4b,0xff2b); +test(0xff4c,0xff2c); +test(0xff4d,0xff2d); +test(0xff4e,0xff2e); +test(0xff4f,0xff2f); +test(0xff50,0xff30); +test(0xff51,0xff31); +test(0xff52,0xff32); +test(0xff53,0xff33); +test(0xff54,0xff34); +test(0xff55,0xff35); +test(0xff56,0xff36); +test(0xff57,0xff37); +test(0xff58,0xff38); +test(0xff59,0xff39); +test(0xff5a,0xff3a); + +if (typeof reportCompare === "function") + reportCompare(true, true); diff --git a/js/src/vm/CaseFolding.txt b/js/src/vm/CaseFolding.txt new file mode 100644 index 000000000..0197a6c40 --- /dev/null +++ b/js/src/vm/CaseFolding.txt @@ -0,0 +1,1414 @@ +# CaseFolding-8.0.0.txt +# Date: 2015-01-13, 18:16:36 GMT [MD] +# +# Unicode Character Database +# Copyright (c) 1991-2015 Unicode, Inc. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# For documentation, see http://www.unicode.org/reports/tr44/ +# +# Case Folding Properties +# +# This file is a supplement to the UnicodeData file. +# It provides a case folding mapping generated from the Unicode Character Database. +# If all characters are mapped according to the full mapping below, then +# case differences (according to UnicodeData.txt and SpecialCasing.txt) +# are eliminated. +# +# The data supports both implementations that require simple case foldings +# (where string lengths don't change), and implementations that allow full case folding +# (where string lengths may grow). Note that where they can be supported, the +# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match. +# +# All code points not listed in this file map to themselves. +# +# NOTE: case folding does not preserve normalization formats! +# +# For information on case folding, including how to have case folding +# preserve normalization formats, see Section 3.13 Default Case Algorithms in +# The Unicode Standard. +# +# ================================================================================ +# Format +# ================================================================================ +# The entries in this file are in the following machine-readable format: +# +# ; ; ; # +# +# The status field is: +# C: common case folding, common mappings shared by both simple and full mappings. +# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces. +# S: simple case folding, mappings to single characters where different from F. +# T: special case for uppercase I and dotted uppercase I +# - For non-Turkic languages, this mapping is normally not used. +# - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters. +# Note that the Turkic mappings do not maintain canonical equivalence without additional processing. +# See the discussions of case mapping in the Unicode Standard for more information. +# +# Usage: +# A. To do a simple case folding, use the mappings with status C + S. +# B. To do a full case folding, use the mappings with status C + F. +# +# The mappings with status T can be used or omitted depending on the desired case-folding +# behavior. (The default option is to exclude them.) +# +# ================================================================= + +# Property: Case_Folding + +# All code points not explicitly listed for Case_Folding +# have the value C for the status field, and the code point itself for the mapping field. + +# ================================================================= +0041; C; 0061; # LATIN CAPITAL LETTER A +0042; C; 0062; # LATIN CAPITAL LETTER B +0043; C; 0063; # LATIN CAPITAL LETTER C +0044; C; 0064; # LATIN CAPITAL LETTER D +0045; C; 0065; # LATIN CAPITAL LETTER E +0046; C; 0066; # LATIN CAPITAL LETTER F +0047; C; 0067; # LATIN CAPITAL LETTER G +0048; C; 0068; # LATIN CAPITAL LETTER H +0049; C; 0069; # LATIN CAPITAL LETTER I +0049; T; 0131; # LATIN CAPITAL LETTER I +004A; C; 006A; # LATIN CAPITAL LETTER J +004B; C; 006B; # LATIN CAPITAL LETTER K +004C; C; 006C; # LATIN CAPITAL LETTER L +004D; C; 006D; # LATIN CAPITAL LETTER M +004E; C; 006E; # LATIN CAPITAL LETTER N +004F; C; 006F; # LATIN CAPITAL LETTER O +0050; C; 0070; # LATIN CAPITAL LETTER P +0051; C; 0071; # LATIN CAPITAL LETTER Q +0052; C; 0072; # LATIN CAPITAL LETTER R +0053; C; 0073; # LATIN CAPITAL LETTER S +0054; C; 0074; # LATIN CAPITAL LETTER T +0055; C; 0075; # LATIN CAPITAL LETTER U +0056; C; 0076; # LATIN CAPITAL LETTER V +0057; C; 0077; # LATIN CAPITAL LETTER W +0058; C; 0078; # LATIN CAPITAL LETTER X +0059; C; 0079; # LATIN CAPITAL LETTER Y +005A; C; 007A; # LATIN CAPITAL LETTER Z +00B5; C; 03BC; # MICRO SIGN +00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE +00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE +00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE +00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS +00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE +00C6; C; 00E6; # LATIN CAPITAL LETTER AE +00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA +00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE +00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE +00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS +00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE +00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE +00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS +00D0; C; 00F0; # LATIN CAPITAL LETTER ETH +00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE +00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE +00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE +00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE +00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS +00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE +00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE +00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE +00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS +00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE +00DE; C; 00FE; # LATIN CAPITAL LETTER THORN +00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S +0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON +0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE +0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK +0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE +0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX +010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE +010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON +010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON +0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE +0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON +0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE +0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE +0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK +011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON +011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX +011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE +0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE +0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA +0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX +0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE +0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE +012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON +012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE +012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK +0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE +0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE +0132; C; 0133; # LATIN CAPITAL LIGATURE IJ +0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX +0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA +0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE +013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA +013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON +013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT +0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE +0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE +0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA +0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON +0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +014A; C; 014B; # LATIN CAPITAL LETTER ENG +014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON +014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE +0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0152; C; 0153; # LATIN CAPITAL LIGATURE OE +0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE +0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA +0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON +015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE +015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX +015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA +0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON +0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA +0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON +0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE +0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE +016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON +016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE +016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE +0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK +0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX +0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX +0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS +0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE +017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE +017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON +017F; C; 0073; # LATIN SMALL LETTER LONG S +0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK +0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR +0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX +0186; C; 0254; # LATIN CAPITAL LETTER OPEN O +0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK +0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D +018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK +018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR +018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E +018F; C; 0259; # LATIN CAPITAL LETTER SCHWA +0190; C; 025B; # LATIN CAPITAL LETTER OPEN E +0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK +0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK +0194; C; 0263; # LATIN CAPITAL LETTER GAMMA +0196; C; 0269; # LATIN CAPITAL LETTER IOTA +0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE +0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK +019C; C; 026F; # LATIN CAPITAL LETTER TURNED M +019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK +019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE +01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN +01A2; C; 01A3; # LATIN CAPITAL LETTER OI +01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK +01A6; C; 0280; # LATIN LETTER YR +01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO +01A9; C; 0283; # LATIN CAPITAL LETTER ESH +01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK +01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK +01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN +01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON +01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK +01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK +01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE +01B7; C; 0292; # LATIN CAPITAL LETTER EZH +01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED +01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE +01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON +01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON +01C7; C; 01C9; # LATIN CAPITAL LETTER LJ +01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J +01CA; C; 01CC; # LATIN CAPITAL LETTER NJ +01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J +01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON +01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON +01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON +01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON +01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON +01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE +01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON +01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE +01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON +01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON +01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON +01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE +01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON +01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON +01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK +01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON +01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON +01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON +01F1; C; 01F3; # LATIN CAPITAL LETTER DZ +01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z +01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE +01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR +01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN +01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE +01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE +01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE +01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE +0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE +0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE +0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE +0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE +0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE +020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE +020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE +020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE +0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE +0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE +0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE +0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE +0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW +021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW +021C; C; 021D; # LATIN CAPITAL LETTER YOGH +021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON +0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG +0222; C; 0223; # LATIN CAPITAL LETTER OU +0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK +0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE +0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA +022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON +022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON +022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE +0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON +0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON +023A; C; 2C65; # LATIN CAPITAL LETTER A WITH STROKE +023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE +023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR +023E; C; 2C66; # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE +0241; C; 0242; # LATIN CAPITAL LETTER GLOTTAL STOP +0243; C; 0180; # LATIN CAPITAL LETTER B WITH STROKE +0244; C; 0289; # LATIN CAPITAL LETTER U BAR +0245; C; 028C; # LATIN CAPITAL LETTER TURNED V +0246; C; 0247; # LATIN CAPITAL LETTER E WITH STROKE +0248; C; 0249; # LATIN CAPITAL LETTER J WITH STROKE +024A; C; 024B; # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL +024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE +024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE +0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI +0370; C; 0371; # GREEK CAPITAL LETTER HETA +0372; C; 0373; # GREEK CAPITAL LETTER ARCHAIC SAMPI +0376; C; 0377; # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA +037F; C; 03F3; # GREEK CAPITAL LETTER YOT +0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS +0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS +0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS +038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS +038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS +038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS +038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS +0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA +0392; C; 03B2; # GREEK CAPITAL LETTER BETA +0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA +0394; C; 03B4; # GREEK CAPITAL LETTER DELTA +0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON +0396; C; 03B6; # GREEK CAPITAL LETTER ZETA +0397; C; 03B7; # GREEK CAPITAL LETTER ETA +0398; C; 03B8; # GREEK CAPITAL LETTER THETA +0399; C; 03B9; # GREEK CAPITAL LETTER IOTA +039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA +039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA +039C; C; 03BC; # GREEK CAPITAL LETTER MU +039D; C; 03BD; # GREEK CAPITAL LETTER NU +039E; C; 03BE; # GREEK CAPITAL LETTER XI +039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON +03A0; C; 03C0; # GREEK CAPITAL LETTER PI +03A1; C; 03C1; # GREEK CAPITAL LETTER RHO +03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA +03A4; C; 03C4; # GREEK CAPITAL LETTER TAU +03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON +03A6; C; 03C6; # GREEK CAPITAL LETTER PHI +03A7; C; 03C7; # GREEK CAPITAL LETTER CHI +03A8; C; 03C8; # GREEK CAPITAL LETTER PSI +03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA +03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA +03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS +03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA +03CF; C; 03D7; # GREEK CAPITAL KAI SYMBOL +03D0; C; 03B2; # GREEK BETA SYMBOL +03D1; C; 03B8; # GREEK THETA SYMBOL +03D5; C; 03C6; # GREEK PHI SYMBOL +03D6; C; 03C0; # GREEK PI SYMBOL +03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA +03DA; C; 03DB; # GREEK LETTER STIGMA +03DC; C; 03DD; # GREEK LETTER DIGAMMA +03DE; C; 03DF; # GREEK LETTER KOPPA +03E0; C; 03E1; # GREEK LETTER SAMPI +03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI +03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI +03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI +03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI +03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA +03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA +03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI +03F0; C; 03BA; # GREEK KAPPA SYMBOL +03F1; C; 03C1; # GREEK RHO SYMBOL +03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL +03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL +03F7; C; 03F8; # GREEK CAPITAL LETTER SHO +03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL +03FA; C; 03FB; # GREEK CAPITAL LETTER SAN +03FD; C; 037B; # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL +03FE; C; 037C; # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL +03FF; C; 037D; # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL +0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE +0401; C; 0451; # CYRILLIC CAPITAL LETTER IO +0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE +0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE +0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE +0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE +0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +0407; C; 0457; # CYRILLIC CAPITAL LETTER YI +0408; C; 0458; # CYRILLIC CAPITAL LETTER JE +0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE +040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE +040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE +040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE +040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE +040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U +040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE +0410; C; 0430; # CYRILLIC CAPITAL LETTER A +0411; C; 0431; # CYRILLIC CAPITAL LETTER BE +0412; C; 0432; # CYRILLIC CAPITAL LETTER VE +0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE +0414; C; 0434; # CYRILLIC CAPITAL LETTER DE +0415; C; 0435; # CYRILLIC CAPITAL LETTER IE +0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE +0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE +0418; C; 0438; # CYRILLIC CAPITAL LETTER I +0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I +041A; C; 043A; # CYRILLIC CAPITAL LETTER KA +041B; C; 043B; # CYRILLIC CAPITAL LETTER EL +041C; C; 043C; # CYRILLIC CAPITAL LETTER EM +041D; C; 043D; # CYRILLIC CAPITAL LETTER EN +041E; C; 043E; # CYRILLIC CAPITAL LETTER O +041F; C; 043F; # CYRILLIC CAPITAL LETTER PE +0420; C; 0440; # CYRILLIC CAPITAL LETTER ER +0421; C; 0441; # CYRILLIC CAPITAL LETTER ES +0422; C; 0442; # CYRILLIC CAPITAL LETTER TE +0423; C; 0443; # CYRILLIC CAPITAL LETTER U +0424; C; 0444; # CYRILLIC CAPITAL LETTER EF +0425; C; 0445; # CYRILLIC CAPITAL LETTER HA +0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE +0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE +0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA +0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA +042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN +042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU +042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN +042D; C; 044D; # CYRILLIC CAPITAL LETTER E +042E; C; 044E; # CYRILLIC CAPITAL LETTER YU +042F; C; 044F; # CYRILLIC CAPITAL LETTER YA +0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA +0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT +0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E +0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS +0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS +046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS +046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS +046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI +0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI +0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA +0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA +0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +0478; C; 0479; # CYRILLIC CAPITAL LETTER UK +047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA +047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO +047E; C; 047F; # CYRILLIC CAPITAL LETTER OT +0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA +048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL +048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN +048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK +0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN +0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE +0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK +0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER +0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER +049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER +049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE +049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE +04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA +04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER +04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE +04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK +04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA +04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER +04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER +04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U +04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE +04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER +04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE +04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER +04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE +04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA +04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE +04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER +04C0; C; 04CF; # CYRILLIC LETTER PALOCHKA +04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE +04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK +04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL +04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK +04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL +04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE +04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL +04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE +04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS +04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE +04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE +04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA +04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS +04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS +04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS +04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE +04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON +04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS +04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS +04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O +04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS +04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS +04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON +04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS +04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE +04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS +04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER +04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS +04FA; C; 04FB; # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK +04FC; C; 04FD; # CYRILLIC CAPITAL LETTER HA WITH HOOK +04FE; C; 04FF; # CYRILLIC CAPITAL LETTER HA WITH STROKE +0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE +0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE +0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE +0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE +0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE +050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE +050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE +050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE +0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE +0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK +0514; C; 0515; # CYRILLIC CAPITAL LETTER LHA +0516; C; 0517; # CYRILLIC CAPITAL LETTER RHA +0518; C; 0519; # CYRILLIC CAPITAL LETTER YAE +051A; C; 051B; # CYRILLIC CAPITAL LETTER QA +051C; C; 051D; # CYRILLIC CAPITAL LETTER WE +051E; C; 051F; # CYRILLIC CAPITAL LETTER ALEUT KA +0520; C; 0521; # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK +0522; C; 0523; # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK +0524; C; 0525; # CYRILLIC CAPITAL LETTER PE WITH DESCENDER +0526; C; 0527; # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER +0528; C; 0529; # CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK +052A; C; 052B; # CYRILLIC CAPITAL LETTER DZZHE +052C; C; 052D; # CYRILLIC CAPITAL LETTER DCHE +052E; C; 052F; # CYRILLIC CAPITAL LETTER EL WITH DESCENDER +0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB +0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN +0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM +0534; C; 0564; # ARMENIAN CAPITAL LETTER DA +0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH +0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA +0537; C; 0567; # ARMENIAN CAPITAL LETTER EH +0538; C; 0568; # ARMENIAN CAPITAL LETTER ET +0539; C; 0569; # ARMENIAN CAPITAL LETTER TO +053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE +053B; C; 056B; # ARMENIAN CAPITAL LETTER INI +053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN +053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH +053E; C; 056E; # ARMENIAN CAPITAL LETTER CA +053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN +0540; C; 0570; # ARMENIAN CAPITAL LETTER HO +0541; C; 0571; # ARMENIAN CAPITAL LETTER JA +0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD +0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH +0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN +0545; C; 0575; # ARMENIAN CAPITAL LETTER YI +0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW +0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA +0548; C; 0578; # ARMENIAN CAPITAL LETTER VO +0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA +054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH +054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH +054C; C; 057C; # ARMENIAN CAPITAL LETTER RA +054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH +054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW +054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN +0550; C; 0580; # ARMENIAN CAPITAL LETTER REH +0551; C; 0581; # ARMENIAN CAPITAL LETTER CO +0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN +0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR +0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH +0555; C; 0585; # ARMENIAN CAPITAL LETTER OH +0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH +0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN +10A0; C; 2D00; # GEORGIAN CAPITAL LETTER AN +10A1; C; 2D01; # GEORGIAN CAPITAL LETTER BAN +10A2; C; 2D02; # GEORGIAN CAPITAL LETTER GAN +10A3; C; 2D03; # GEORGIAN CAPITAL LETTER DON +10A4; C; 2D04; # GEORGIAN CAPITAL LETTER EN +10A5; C; 2D05; # GEORGIAN CAPITAL LETTER VIN +10A6; C; 2D06; # GEORGIAN CAPITAL LETTER ZEN +10A7; C; 2D07; # GEORGIAN CAPITAL LETTER TAN +10A8; C; 2D08; # GEORGIAN CAPITAL LETTER IN +10A9; C; 2D09; # GEORGIAN CAPITAL LETTER KAN +10AA; C; 2D0A; # GEORGIAN CAPITAL LETTER LAS +10AB; C; 2D0B; # GEORGIAN CAPITAL LETTER MAN +10AC; C; 2D0C; # GEORGIAN CAPITAL LETTER NAR +10AD; C; 2D0D; # GEORGIAN CAPITAL LETTER ON +10AE; C; 2D0E; # GEORGIAN CAPITAL LETTER PAR +10AF; C; 2D0F; # GEORGIAN CAPITAL LETTER ZHAR +10B0; C; 2D10; # GEORGIAN CAPITAL LETTER RAE +10B1; C; 2D11; # GEORGIAN CAPITAL LETTER SAN +10B2; C; 2D12; # GEORGIAN CAPITAL LETTER TAR +10B3; C; 2D13; # GEORGIAN CAPITAL LETTER UN +10B4; C; 2D14; # GEORGIAN CAPITAL LETTER PHAR +10B5; C; 2D15; # GEORGIAN CAPITAL LETTER KHAR +10B6; C; 2D16; # GEORGIAN CAPITAL LETTER GHAN +10B7; C; 2D17; # GEORGIAN CAPITAL LETTER QAR +10B8; C; 2D18; # GEORGIAN CAPITAL LETTER SHIN +10B9; C; 2D19; # GEORGIAN CAPITAL LETTER CHIN +10BA; C; 2D1A; # GEORGIAN CAPITAL LETTER CAN +10BB; C; 2D1B; # GEORGIAN CAPITAL LETTER JIL +10BC; C; 2D1C; # GEORGIAN CAPITAL LETTER CIL +10BD; C; 2D1D; # GEORGIAN CAPITAL LETTER CHAR +10BE; C; 2D1E; # GEORGIAN CAPITAL LETTER XAN +10BF; C; 2D1F; # GEORGIAN CAPITAL LETTER JHAN +10C0; C; 2D20; # GEORGIAN CAPITAL LETTER HAE +10C1; C; 2D21; # GEORGIAN CAPITAL LETTER HE +10C2; C; 2D22; # GEORGIAN CAPITAL LETTER HIE +10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE +10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR +10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE +10C7; C; 2D27; # GEORGIAN CAPITAL LETTER YN +10CD; C; 2D2D; # GEORGIAN CAPITAL LETTER AEN +13F8; C; 13F0; # CHEROKEE SMALL LETTER YE +13F9; C; 13F1; # CHEROKEE SMALL LETTER YI +13FA; C; 13F2; # CHEROKEE SMALL LETTER YO +13FB; C; 13F3; # CHEROKEE SMALL LETTER YU +13FC; C; 13F4; # CHEROKEE SMALL LETTER YV +13FD; C; 13F5; # CHEROKEE SMALL LETTER MV +1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW +1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE +1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW +1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW +1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE +1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE +1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW +1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW +1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA +1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW +1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE +1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE +1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW +1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW +1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE +1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE +1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON +1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE +1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW +1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS +1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA +1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW +1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW +1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE +1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE +1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW +1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW +1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW +1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON +1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW +1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW +1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE +1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE +1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW +1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE +1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW +1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW +1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW +1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE +1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS +1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE +1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE +1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE +1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE +1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE +1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW +1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON +1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW +1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE +1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW +1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE +1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE +1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE +1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE +1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW +1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW +1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW +1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW +1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW +1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW +1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE +1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS +1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE +1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW +1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE +1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE +1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS +1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE +1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW +1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE +1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS +1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE +1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX +1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW +1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW +1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW +1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS +1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE +1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE +1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING +1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE +1E9E; F; 0073 0073; # LATIN CAPITAL LETTER SHARP S +1E9E; S; 00DF; # LATIN CAPITAL LETTER SHARP S +1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW +1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE +1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE +1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE +1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE +1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE +1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW +1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE +1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE +1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE +1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE +1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW +1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW +1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE +1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE +1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE +1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE +1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE +1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE +1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW +1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE +1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW +1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW +1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE +1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE +1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE +1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE +1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE +1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW +1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE +1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE +1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE +1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE +1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW +1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW +1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE +1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE +1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE +1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE +1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE +1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW +1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE +1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW +1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE +1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE +1EFA; C; 1EFB; # LATIN CAPITAL LETTER MIDDLE-WELSH LL +1EFC; C; 1EFD; # LATIN CAPITAL LETTER MIDDLE-WELSH V +1EFE; C; 1EFF; # LATIN CAPITAL LETTER Y WITH LOOP +1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI +1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA +1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA +1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA +1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA +1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA +1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI +1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI +1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI +1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA +1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA +1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA +1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA +1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI +1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA +1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA +1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA +1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA +1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA +1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI +1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI +1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI +1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA +1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA +1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA +1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA +1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA +1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI +1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI +1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI +1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA +1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA +1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA +1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA +1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI +1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA +1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA +1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI +1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI +1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA +1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA +1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA +1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA +1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA +1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI +1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI +1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI +1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI +1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI +1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI +1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI +1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI +1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI +1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI +1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI +1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI +1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI +1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI +1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI +1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI +1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI +1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI +1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI +1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI +1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI +1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI +1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI +1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI +1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY +1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON +1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA +1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA +1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE; C; 03B9; # GREEK PROSGEGRAMMENI +1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI +1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI +1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI +1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI +1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA +1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA +1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA +1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA +1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA +1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI +1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI +1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY +1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON +1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA +1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA +1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA +1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA +1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI +1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI +1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI +1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY +1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON +1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA +1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA +1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA +1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI +1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI +1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI +1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI +1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA +1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA +1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA +1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA +1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +2126; C; 03C9; # OHM SIGN +212A; C; 006B; # KELVIN SIGN +212B; C; 00E5; # ANGSTROM SIGN +2132; C; 214E; # TURNED CAPITAL F +2160; C; 2170; # ROMAN NUMERAL ONE +2161; C; 2171; # ROMAN NUMERAL TWO +2162; C; 2172; # ROMAN NUMERAL THREE +2163; C; 2173; # ROMAN NUMERAL FOUR +2164; C; 2174; # ROMAN NUMERAL FIVE +2165; C; 2175; # ROMAN NUMERAL SIX +2166; C; 2176; # ROMAN NUMERAL SEVEN +2167; C; 2177; # ROMAN NUMERAL EIGHT +2168; C; 2178; # ROMAN NUMERAL NINE +2169; C; 2179; # ROMAN NUMERAL TEN +216A; C; 217A; # ROMAN NUMERAL ELEVEN +216B; C; 217B; # ROMAN NUMERAL TWELVE +216C; C; 217C; # ROMAN NUMERAL FIFTY +216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED +216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED +216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND +2183; C; 2184; # ROMAN NUMERAL REVERSED ONE HUNDRED +24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A +24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B +24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C +24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D +24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E +24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F +24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G +24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H +24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I +24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J +24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K +24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L +24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M +24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N +24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O +24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P +24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q +24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R +24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S +24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T +24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U +24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V +24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W +24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X +24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y +24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z +2C00; C; 2C30; # GLAGOLITIC CAPITAL LETTER AZU +2C01; C; 2C31; # GLAGOLITIC CAPITAL LETTER BUKY +2C02; C; 2C32; # GLAGOLITIC CAPITAL LETTER VEDE +2C03; C; 2C33; # GLAGOLITIC CAPITAL LETTER GLAGOLI +2C04; C; 2C34; # GLAGOLITIC CAPITAL LETTER DOBRO +2C05; C; 2C35; # GLAGOLITIC CAPITAL LETTER YESTU +2C06; C; 2C36; # GLAGOLITIC CAPITAL LETTER ZHIVETE +2C07; C; 2C37; # GLAGOLITIC CAPITAL LETTER DZELO +2C08; C; 2C38; # GLAGOLITIC CAPITAL LETTER ZEMLJA +2C09; C; 2C39; # GLAGOLITIC CAPITAL LETTER IZHE +2C0A; C; 2C3A; # GLAGOLITIC CAPITAL LETTER INITIAL IZHE +2C0B; C; 2C3B; # GLAGOLITIC CAPITAL LETTER I +2C0C; C; 2C3C; # GLAGOLITIC CAPITAL LETTER DJERVI +2C0D; C; 2C3D; # GLAGOLITIC CAPITAL LETTER KAKO +2C0E; C; 2C3E; # GLAGOLITIC CAPITAL LETTER LJUDIJE +2C0F; C; 2C3F; # GLAGOLITIC CAPITAL LETTER MYSLITE +2C10; C; 2C40; # GLAGOLITIC CAPITAL LETTER NASHI +2C11; C; 2C41; # GLAGOLITIC CAPITAL LETTER ONU +2C12; C; 2C42; # GLAGOLITIC CAPITAL LETTER POKOJI +2C13; C; 2C43; # GLAGOLITIC CAPITAL LETTER RITSI +2C14; C; 2C44; # GLAGOLITIC CAPITAL LETTER SLOVO +2C15; C; 2C45; # GLAGOLITIC CAPITAL LETTER TVRIDO +2C16; C; 2C46; # GLAGOLITIC CAPITAL LETTER UKU +2C17; C; 2C47; # GLAGOLITIC CAPITAL LETTER FRITU +2C18; C; 2C48; # GLAGOLITIC CAPITAL LETTER HERU +2C19; C; 2C49; # GLAGOLITIC CAPITAL LETTER OTU +2C1A; C; 2C4A; # GLAGOLITIC CAPITAL LETTER PE +2C1B; C; 2C4B; # GLAGOLITIC CAPITAL LETTER SHTA +2C1C; C; 2C4C; # GLAGOLITIC CAPITAL LETTER TSI +2C1D; C; 2C4D; # GLAGOLITIC CAPITAL LETTER CHRIVI +2C1E; C; 2C4E; # GLAGOLITIC CAPITAL LETTER SHA +2C1F; C; 2C4F; # GLAGOLITIC CAPITAL LETTER YERU +2C20; C; 2C50; # GLAGOLITIC CAPITAL LETTER YERI +2C21; C; 2C51; # GLAGOLITIC CAPITAL LETTER YATI +2C22; C; 2C52; # GLAGOLITIC CAPITAL LETTER SPIDERY HA +2C23; C; 2C53; # GLAGOLITIC CAPITAL LETTER YU +2C24; C; 2C54; # GLAGOLITIC CAPITAL LETTER SMALL YUS +2C25; C; 2C55; # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL +2C26; C; 2C56; # GLAGOLITIC CAPITAL LETTER YO +2C27; C; 2C57; # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS +2C28; C; 2C58; # GLAGOLITIC CAPITAL LETTER BIG YUS +2C29; C; 2C59; # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS +2C2A; C; 2C5A; # GLAGOLITIC CAPITAL LETTER FITA +2C2B; C; 2C5B; # GLAGOLITIC CAPITAL LETTER IZHITSA +2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC +2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A +2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE +2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR +2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE +2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE +2C64; C; 027D; # LATIN CAPITAL LETTER R WITH TAIL +2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER +2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER +2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER +2C6D; C; 0251; # LATIN CAPITAL LETTER ALPHA +2C6E; C; 0271; # LATIN CAPITAL LETTER M WITH HOOK +2C6F; C; 0250; # LATIN CAPITAL LETTER TURNED A +2C70; C; 0252; # LATIN CAPITAL LETTER TURNED ALPHA +2C72; C; 2C73; # LATIN CAPITAL LETTER W WITH HOOK +2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H +2C7E; C; 023F; # LATIN CAPITAL LETTER S WITH SWASH TAIL +2C7F; C; 0240; # LATIN CAPITAL LETTER Z WITH SWASH TAIL +2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA +2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA +2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA +2C86; C; 2C87; # COPTIC CAPITAL LETTER DALDA +2C88; C; 2C89; # COPTIC CAPITAL LETTER EIE +2C8A; C; 2C8B; # COPTIC CAPITAL LETTER SOU +2C8C; C; 2C8D; # COPTIC CAPITAL LETTER ZATA +2C8E; C; 2C8F; # COPTIC CAPITAL LETTER HATE +2C90; C; 2C91; # COPTIC CAPITAL LETTER THETHE +2C92; C; 2C93; # COPTIC CAPITAL LETTER IAUDA +2C94; C; 2C95; # COPTIC CAPITAL LETTER KAPA +2C96; C; 2C97; # COPTIC CAPITAL LETTER LAULA +2C98; C; 2C99; # COPTIC CAPITAL LETTER MI +2C9A; C; 2C9B; # COPTIC CAPITAL LETTER NI +2C9C; C; 2C9D; # COPTIC CAPITAL LETTER KSI +2C9E; C; 2C9F; # COPTIC CAPITAL LETTER O +2CA0; C; 2CA1; # COPTIC CAPITAL LETTER PI +2CA2; C; 2CA3; # COPTIC CAPITAL LETTER RO +2CA4; C; 2CA5; # COPTIC CAPITAL LETTER SIMA +2CA6; C; 2CA7; # COPTIC CAPITAL LETTER TAU +2CA8; C; 2CA9; # COPTIC CAPITAL LETTER UA +2CAA; C; 2CAB; # COPTIC CAPITAL LETTER FI +2CAC; C; 2CAD; # COPTIC CAPITAL LETTER KHI +2CAE; C; 2CAF; # COPTIC CAPITAL LETTER PSI +2CB0; C; 2CB1; # COPTIC CAPITAL LETTER OOU +2CB2; C; 2CB3; # COPTIC CAPITAL LETTER DIALECT-P ALEF +2CB4; C; 2CB5; # COPTIC CAPITAL LETTER OLD COPTIC AIN +2CB6; C; 2CB7; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE +2CB8; C; 2CB9; # COPTIC CAPITAL LETTER DIALECT-P KAPA +2CBA; C; 2CBB; # COPTIC CAPITAL LETTER DIALECT-P NI +2CBC; C; 2CBD; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI +2CBE; C; 2CBF; # COPTIC CAPITAL LETTER OLD COPTIC OOU +2CC0; C; 2CC1; # COPTIC CAPITAL LETTER SAMPI +2CC2; C; 2CC3; # COPTIC CAPITAL LETTER CROSSED SHEI +2CC4; C; 2CC5; # COPTIC CAPITAL LETTER OLD COPTIC SHEI +2CC6; C; 2CC7; # COPTIC CAPITAL LETTER OLD COPTIC ESH +2CC8; C; 2CC9; # COPTIC CAPITAL LETTER AKHMIMIC KHEI +2CCA; C; 2CCB; # COPTIC CAPITAL LETTER DIALECT-P HORI +2CCC; C; 2CCD; # COPTIC CAPITAL LETTER OLD COPTIC HORI +2CCE; C; 2CCF; # COPTIC CAPITAL LETTER OLD COPTIC HA +2CD0; C; 2CD1; # COPTIC CAPITAL LETTER L-SHAPED HA +2CD2; C; 2CD3; # COPTIC CAPITAL LETTER OLD COPTIC HEI +2CD4; C; 2CD5; # COPTIC CAPITAL LETTER OLD COPTIC HAT +2CD6; C; 2CD7; # COPTIC CAPITAL LETTER OLD COPTIC GANGIA +2CD8; C; 2CD9; # COPTIC CAPITAL LETTER OLD COPTIC DJA +2CDA; C; 2CDB; # COPTIC CAPITAL LETTER OLD COPTIC SHIMA +2CDC; C; 2CDD; # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA +2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI +2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI +2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU +2CEB; C; 2CEC; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI +2CED; C; 2CEE; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2; C; 2CF3; # COPTIC CAPITAL LETTER BOHAIRIC KHEI +A640; C; A641; # CYRILLIC CAPITAL LETTER ZEMLYA +A642; C; A643; # CYRILLIC CAPITAL LETTER DZELO +A644; C; A645; # CYRILLIC CAPITAL LETTER REVERSED DZE +A646; C; A647; # CYRILLIC CAPITAL LETTER IOTA +A648; C; A649; # CYRILLIC CAPITAL LETTER DJERV +A64A; C; A64B; # CYRILLIC CAPITAL LETTER MONOGRAPH UK +A64C; C; A64D; # CYRILLIC CAPITAL LETTER BROAD OMEGA +A64E; C; A64F; # CYRILLIC CAPITAL LETTER NEUTRAL YER +A650; C; A651; # CYRILLIC CAPITAL LETTER YERU WITH BACK YER +A652; C; A653; # CYRILLIC CAPITAL LETTER IOTIFIED YAT +A654; C; A655; # CYRILLIC CAPITAL LETTER REVERSED YU +A656; C; A657; # CYRILLIC CAPITAL LETTER IOTIFIED A +A658; C; A659; # CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS +A65A; C; A65B; # CYRILLIC CAPITAL LETTER BLENDED YUS +A65C; C; A65D; # CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS +A65E; C; A65F; # CYRILLIC CAPITAL LETTER YN +A660; C; A661; # CYRILLIC CAPITAL LETTER REVERSED TSE +A662; C; A663; # CYRILLIC CAPITAL LETTER SOFT DE +A664; C; A665; # CYRILLIC CAPITAL LETTER SOFT EL +A666; C; A667; # CYRILLIC CAPITAL LETTER SOFT EM +A668; C; A669; # CYRILLIC CAPITAL LETTER MONOCULAR O +A66A; C; A66B; # CYRILLIC CAPITAL LETTER BINOCULAR O +A66C; C; A66D; # CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O +A680; C; A681; # CYRILLIC CAPITAL LETTER DWE +A682; C; A683; # CYRILLIC CAPITAL LETTER DZWE +A684; C; A685; # CYRILLIC CAPITAL LETTER ZHWE +A686; C; A687; # CYRILLIC CAPITAL LETTER CCHE +A688; C; A689; # CYRILLIC CAPITAL LETTER DZZE +A68A; C; A68B; # CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK +A68C; C; A68D; # CYRILLIC CAPITAL LETTER TWE +A68E; C; A68F; # CYRILLIC CAPITAL LETTER TSWE +A690; C; A691; # CYRILLIC CAPITAL LETTER TSSE +A692; C; A693; # CYRILLIC CAPITAL LETTER TCHE +A694; C; A695; # CYRILLIC CAPITAL LETTER HWE +A696; C; A697; # CYRILLIC CAPITAL LETTER SHWE +A698; C; A699; # CYRILLIC CAPITAL LETTER DOUBLE O +A69A; C; A69B; # CYRILLIC CAPITAL LETTER CROSSED O +A722; C; A723; # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF +A724; C; A725; # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN +A726; C; A727; # LATIN CAPITAL LETTER HENG +A728; C; A729; # LATIN CAPITAL LETTER TZ +A72A; C; A72B; # LATIN CAPITAL LETTER TRESILLO +A72C; C; A72D; # LATIN CAPITAL LETTER CUATRILLO +A72E; C; A72F; # LATIN CAPITAL LETTER CUATRILLO WITH COMMA +A732; C; A733; # LATIN CAPITAL LETTER AA +A734; C; A735; # LATIN CAPITAL LETTER AO +A736; C; A737; # LATIN CAPITAL LETTER AU +A738; C; A739; # LATIN CAPITAL LETTER AV +A73A; C; A73B; # LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR +A73C; C; A73D; # LATIN CAPITAL LETTER AY +A73E; C; A73F; # LATIN CAPITAL LETTER REVERSED C WITH DOT +A740; C; A741; # LATIN CAPITAL LETTER K WITH STROKE +A742; C; A743; # LATIN CAPITAL LETTER K WITH DIAGONAL STROKE +A744; C; A745; # LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE +A746; C; A747; # LATIN CAPITAL LETTER BROKEN L +A748; C; A749; # LATIN CAPITAL LETTER L WITH HIGH STROKE +A74A; C; A74B; # LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY +A74C; C; A74D; # LATIN CAPITAL LETTER O WITH LOOP +A74E; C; A74F; # LATIN CAPITAL LETTER OO +A750; C; A751; # LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER +A752; C; A753; # LATIN CAPITAL LETTER P WITH FLOURISH +A754; C; A755; # LATIN CAPITAL LETTER P WITH SQUIRREL TAIL +A756; C; A757; # LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER +A758; C; A759; # LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE +A75A; C; A75B; # LATIN CAPITAL LETTER R ROTUNDA +A75C; C; A75D; # LATIN CAPITAL LETTER RUM ROTUNDA +A75E; C; A75F; # LATIN CAPITAL LETTER V WITH DIAGONAL STROKE +A760; C; A761; # LATIN CAPITAL LETTER VY +A762; C; A763; # LATIN CAPITAL LETTER VISIGOTHIC Z +A764; C; A765; # LATIN CAPITAL LETTER THORN WITH STROKE +A766; C; A767; # LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER +A768; C; A769; # LATIN CAPITAL LETTER VEND +A76A; C; A76B; # LATIN CAPITAL LETTER ET +A76C; C; A76D; # LATIN CAPITAL LETTER IS +A76E; C; A76F; # LATIN CAPITAL LETTER CON +A779; C; A77A; # LATIN CAPITAL LETTER INSULAR D +A77B; C; A77C; # LATIN CAPITAL LETTER INSULAR F +A77D; C; 1D79; # LATIN CAPITAL LETTER INSULAR G +A77E; C; A77F; # LATIN CAPITAL LETTER TURNED INSULAR G +A780; C; A781; # LATIN CAPITAL LETTER TURNED L +A782; C; A783; # LATIN CAPITAL LETTER INSULAR R +A784; C; A785; # LATIN CAPITAL LETTER INSULAR S +A786; C; A787; # LATIN CAPITAL LETTER INSULAR T +A78B; C; A78C; # LATIN CAPITAL LETTER SALTILLO +A78D; C; 0265; # LATIN CAPITAL LETTER TURNED H +A790; C; A791; # LATIN CAPITAL LETTER N WITH DESCENDER +A792; C; A793; # LATIN CAPITAL LETTER C WITH BAR +A796; C; A797; # LATIN CAPITAL LETTER B WITH FLOURISH +A798; C; A799; # LATIN CAPITAL LETTER F WITH STROKE +A79A; C; A79B; # LATIN CAPITAL LETTER VOLAPUK AE +A79C; C; A79D; # LATIN CAPITAL LETTER VOLAPUK OE +A79E; C; A79F; # LATIN CAPITAL LETTER VOLAPUK UE +A7A0; C; A7A1; # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE +A7A2; C; A7A3; # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE +A7A4; C; A7A5; # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE +A7A6; C; A7A7; # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE +A7A8; C; A7A9; # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA; C; 0266; # LATIN CAPITAL LETTER H WITH HOOK +A7AB; C; 025C; # LATIN CAPITAL LETTER REVERSED OPEN E +A7AC; C; 0261; # LATIN CAPITAL LETTER SCRIPT G +A7AD; C; 026C; # LATIN CAPITAL LETTER L WITH BELT +A7B0; C; 029E; # LATIN CAPITAL LETTER TURNED K +A7B1; C; 0287; # LATIN CAPITAL LETTER TURNED T +A7B2; C; 029D; # LATIN CAPITAL LETTER J WITH CROSSED-TAIL +A7B3; C; AB53; # LATIN CAPITAL LETTER CHI +A7B4; C; A7B5; # LATIN CAPITAL LETTER BETA +A7B6; C; A7B7; # LATIN CAPITAL LETTER OMEGA +AB70; C; 13A0; # CHEROKEE SMALL LETTER A +AB71; C; 13A1; # CHEROKEE SMALL LETTER E +AB72; C; 13A2; # CHEROKEE SMALL LETTER I +AB73; C; 13A3; # CHEROKEE SMALL LETTER O +AB74; C; 13A4; # CHEROKEE SMALL LETTER U +AB75; C; 13A5; # CHEROKEE SMALL LETTER V +AB76; C; 13A6; # CHEROKEE SMALL LETTER GA +AB77; C; 13A7; # CHEROKEE SMALL LETTER KA +AB78; C; 13A8; # CHEROKEE SMALL LETTER GE +AB79; C; 13A9; # CHEROKEE SMALL LETTER GI +AB7A; C; 13AA; # CHEROKEE SMALL LETTER GO +AB7B; C; 13AB; # CHEROKEE SMALL LETTER GU +AB7C; C; 13AC; # CHEROKEE SMALL LETTER GV +AB7D; C; 13AD; # CHEROKEE SMALL LETTER HA +AB7E; C; 13AE; # CHEROKEE SMALL LETTER HE +AB7F; C; 13AF; # CHEROKEE SMALL LETTER HI +AB80; C; 13B0; # CHEROKEE SMALL LETTER HO +AB81; C; 13B1; # CHEROKEE SMALL LETTER HU +AB82; C; 13B2; # CHEROKEE SMALL LETTER HV +AB83; C; 13B3; # CHEROKEE SMALL LETTER LA +AB84; C; 13B4; # CHEROKEE SMALL LETTER LE +AB85; C; 13B5; # CHEROKEE SMALL LETTER LI +AB86; C; 13B6; # CHEROKEE SMALL LETTER LO +AB87; C; 13B7; # CHEROKEE SMALL LETTER LU +AB88; C; 13B8; # CHEROKEE SMALL LETTER LV +AB89; C; 13B9; # CHEROKEE SMALL LETTER MA +AB8A; C; 13BA; # CHEROKEE SMALL LETTER ME +AB8B; C; 13BB; # CHEROKEE SMALL LETTER MI +AB8C; C; 13BC; # CHEROKEE SMALL LETTER MO +AB8D; C; 13BD; # CHEROKEE SMALL LETTER MU +AB8E; C; 13BE; # CHEROKEE SMALL LETTER NA +AB8F; C; 13BF; # CHEROKEE SMALL LETTER HNA +AB90; C; 13C0; # CHEROKEE SMALL LETTER NAH +AB91; C; 13C1; # CHEROKEE SMALL LETTER NE +AB92; C; 13C2; # CHEROKEE SMALL LETTER NI +AB93; C; 13C3; # CHEROKEE SMALL LETTER NO +AB94; C; 13C4; # CHEROKEE SMALL LETTER NU +AB95; C; 13C5; # CHEROKEE SMALL LETTER NV +AB96; C; 13C6; # CHEROKEE SMALL LETTER QUA +AB97; C; 13C7; # CHEROKEE SMALL LETTER QUE +AB98; C; 13C8; # CHEROKEE SMALL LETTER QUI +AB99; C; 13C9; # CHEROKEE SMALL LETTER QUO +AB9A; C; 13CA; # CHEROKEE SMALL LETTER QUU +AB9B; C; 13CB; # CHEROKEE SMALL LETTER QUV +AB9C; C; 13CC; # CHEROKEE SMALL LETTER SA +AB9D; C; 13CD; # CHEROKEE SMALL LETTER S +AB9E; C; 13CE; # CHEROKEE SMALL LETTER SE +AB9F; C; 13CF; # CHEROKEE SMALL LETTER SI +ABA0; C; 13D0; # CHEROKEE SMALL LETTER SO +ABA1; C; 13D1; # CHEROKEE SMALL LETTER SU +ABA2; C; 13D2; # CHEROKEE SMALL LETTER SV +ABA3; C; 13D3; # CHEROKEE SMALL LETTER DA +ABA4; C; 13D4; # CHEROKEE SMALL LETTER TA +ABA5; C; 13D5; # CHEROKEE SMALL LETTER DE +ABA6; C; 13D6; # CHEROKEE SMALL LETTER TE +ABA7; C; 13D7; # CHEROKEE SMALL LETTER DI +ABA8; C; 13D8; # CHEROKEE SMALL LETTER TI +ABA9; C; 13D9; # CHEROKEE SMALL LETTER DO +ABAA; C; 13DA; # CHEROKEE SMALL LETTER DU +ABAB; C; 13DB; # CHEROKEE SMALL LETTER DV +ABAC; C; 13DC; # CHEROKEE SMALL LETTER DLA +ABAD; C; 13DD; # CHEROKEE SMALL LETTER TLA +ABAE; C; 13DE; # CHEROKEE SMALL LETTER TLE +ABAF; C; 13DF; # CHEROKEE SMALL LETTER TLI +ABB0; C; 13E0; # CHEROKEE SMALL LETTER TLO +ABB1; C; 13E1; # CHEROKEE SMALL LETTER TLU +ABB2; C; 13E2; # CHEROKEE SMALL LETTER TLV +ABB3; C; 13E3; # CHEROKEE SMALL LETTER TSA +ABB4; C; 13E4; # CHEROKEE SMALL LETTER TSE +ABB5; C; 13E5; # CHEROKEE SMALL LETTER TSI +ABB6; C; 13E6; # CHEROKEE SMALL LETTER TSO +ABB7; C; 13E7; # CHEROKEE SMALL LETTER TSU +ABB8; C; 13E8; # CHEROKEE SMALL LETTER TSV +ABB9; C; 13E9; # CHEROKEE SMALL LETTER WA +ABBA; C; 13EA; # CHEROKEE SMALL LETTER WE +ABBB; C; 13EB; # CHEROKEE SMALL LETTER WI +ABBC; C; 13EC; # CHEROKEE SMALL LETTER WO +ABBD; C; 13ED; # CHEROKEE SMALL LETTER WU +ABBE; C; 13EE; # CHEROKEE SMALL LETTER WV +ABBF; C; 13EF; # CHEROKEE SMALL LETTER YA +FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF +FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI +FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL +FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI +FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL +FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T +FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST +FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW +FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH +FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI +FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW +FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH +FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A +FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B +FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C +FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D +FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E +FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F +FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G +FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H +FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I +FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J +FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K +FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L +FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M +FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N +FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O +FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P +FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q +FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R +FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S +FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T +FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U +FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V +FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W +FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X +FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y +FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z +10400; C; 10428; # DESERET CAPITAL LETTER LONG I +10401; C; 10429; # DESERET CAPITAL LETTER LONG E +10402; C; 1042A; # DESERET CAPITAL LETTER LONG A +10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH +10404; C; 1042C; # DESERET CAPITAL LETTER LONG O +10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO +10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I +10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E +10408; C; 10430; # DESERET CAPITAL LETTER SHORT A +10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH +1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O +1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO +1040C; C; 10434; # DESERET CAPITAL LETTER AY +1040D; C; 10435; # DESERET CAPITAL LETTER OW +1040E; C; 10436; # DESERET CAPITAL LETTER WU +1040F; C; 10437; # DESERET CAPITAL LETTER YEE +10410; C; 10438; # DESERET CAPITAL LETTER H +10411; C; 10439; # DESERET CAPITAL LETTER PEE +10412; C; 1043A; # DESERET CAPITAL LETTER BEE +10413; C; 1043B; # DESERET CAPITAL LETTER TEE +10414; C; 1043C; # DESERET CAPITAL LETTER DEE +10415; C; 1043D; # DESERET CAPITAL LETTER CHEE +10416; C; 1043E; # DESERET CAPITAL LETTER JEE +10417; C; 1043F; # DESERET CAPITAL LETTER KAY +10418; C; 10440; # DESERET CAPITAL LETTER GAY +10419; C; 10441; # DESERET CAPITAL LETTER EF +1041A; C; 10442; # DESERET CAPITAL LETTER VEE +1041B; C; 10443; # DESERET CAPITAL LETTER ETH +1041C; C; 10444; # DESERET CAPITAL LETTER THEE +1041D; C; 10445; # DESERET CAPITAL LETTER ES +1041E; C; 10446; # DESERET CAPITAL LETTER ZEE +1041F; C; 10447; # DESERET CAPITAL LETTER ESH +10420; C; 10448; # DESERET CAPITAL LETTER ZHEE +10421; C; 10449; # DESERET CAPITAL LETTER ER +10422; C; 1044A; # DESERET CAPITAL LETTER EL +10423; C; 1044B; # DESERET CAPITAL LETTER EM +10424; C; 1044C; # DESERET CAPITAL LETTER EN +10425; C; 1044D; # DESERET CAPITAL LETTER ENG +10426; C; 1044E; # DESERET CAPITAL LETTER OI +10427; C; 1044F; # DESERET CAPITAL LETTER EW +10C80; C; 10CC0; # OLD HUNGARIAN CAPITAL LETTER A +10C81; C; 10CC1; # OLD HUNGARIAN CAPITAL LETTER AA +10C82; C; 10CC2; # OLD HUNGARIAN CAPITAL LETTER EB +10C83; C; 10CC3; # OLD HUNGARIAN CAPITAL LETTER AMB +10C84; C; 10CC4; # OLD HUNGARIAN CAPITAL LETTER EC +10C85; C; 10CC5; # OLD HUNGARIAN CAPITAL LETTER ENC +10C86; C; 10CC6; # OLD HUNGARIAN CAPITAL LETTER ECS +10C87; C; 10CC7; # OLD HUNGARIAN CAPITAL LETTER ED +10C88; C; 10CC8; # OLD HUNGARIAN CAPITAL LETTER AND +10C89; C; 10CC9; # OLD HUNGARIAN CAPITAL LETTER E +10C8A; C; 10CCA; # OLD HUNGARIAN CAPITAL LETTER CLOSE E +10C8B; C; 10CCB; # OLD HUNGARIAN CAPITAL LETTER EE +10C8C; C; 10CCC; # OLD HUNGARIAN CAPITAL LETTER EF +10C8D; C; 10CCD; # OLD HUNGARIAN CAPITAL LETTER EG +10C8E; C; 10CCE; # OLD HUNGARIAN CAPITAL LETTER EGY +10C8F; C; 10CCF; # OLD HUNGARIAN CAPITAL LETTER EH +10C90; C; 10CD0; # OLD HUNGARIAN CAPITAL LETTER I +10C91; C; 10CD1; # OLD HUNGARIAN CAPITAL LETTER II +10C92; C; 10CD2; # OLD HUNGARIAN CAPITAL LETTER EJ +10C93; C; 10CD3; # OLD HUNGARIAN CAPITAL LETTER EK +10C94; C; 10CD4; # OLD HUNGARIAN CAPITAL LETTER AK +10C95; C; 10CD5; # OLD HUNGARIAN CAPITAL LETTER UNK +10C96; C; 10CD6; # OLD HUNGARIAN CAPITAL LETTER EL +10C97; C; 10CD7; # OLD HUNGARIAN CAPITAL LETTER ELY +10C98; C; 10CD8; # OLD HUNGARIAN CAPITAL LETTER EM +10C99; C; 10CD9; # OLD HUNGARIAN CAPITAL LETTER EN +10C9A; C; 10CDA; # OLD HUNGARIAN CAPITAL LETTER ENY +10C9B; C; 10CDB; # OLD HUNGARIAN CAPITAL LETTER O +10C9C; C; 10CDC; # OLD HUNGARIAN CAPITAL LETTER OO +10C9D; C; 10CDD; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG OE +10C9E; C; 10CDE; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA OE +10C9F; C; 10CDF; # OLD HUNGARIAN CAPITAL LETTER OEE +10CA0; C; 10CE0; # OLD HUNGARIAN CAPITAL LETTER EP +10CA1; C; 10CE1; # OLD HUNGARIAN CAPITAL LETTER EMP +10CA2; C; 10CE2; # OLD HUNGARIAN CAPITAL LETTER ER +10CA3; C; 10CE3; # OLD HUNGARIAN CAPITAL LETTER SHORT ER +10CA4; C; 10CE4; # OLD HUNGARIAN CAPITAL LETTER ES +10CA5; C; 10CE5; # OLD HUNGARIAN CAPITAL LETTER ESZ +10CA6; C; 10CE6; # OLD HUNGARIAN CAPITAL LETTER ET +10CA7; C; 10CE7; # OLD HUNGARIAN CAPITAL LETTER ENT +10CA8; C; 10CE8; # OLD HUNGARIAN CAPITAL LETTER ETY +10CA9; C; 10CE9; # OLD HUNGARIAN CAPITAL LETTER ECH +10CAA; C; 10CEA; # OLD HUNGARIAN CAPITAL LETTER U +10CAB; C; 10CEB; # OLD HUNGARIAN CAPITAL LETTER UU +10CAC; C; 10CEC; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG UE +10CAD; C; 10CED; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA UE +10CAE; C; 10CEE; # OLD HUNGARIAN CAPITAL LETTER EV +10CAF; C; 10CEF; # OLD HUNGARIAN CAPITAL LETTER EZ +10CB0; C; 10CF0; # OLD HUNGARIAN CAPITAL LETTER EZS +10CB1; C; 10CF1; # OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN +10CB2; C; 10CF2; # OLD HUNGARIAN CAPITAL LETTER US +118A0; C; 118C0; # WARANG CITI CAPITAL LETTER NGAA +118A1; C; 118C1; # WARANG CITI CAPITAL LETTER A +118A2; C; 118C2; # WARANG CITI CAPITAL LETTER WI +118A3; C; 118C3; # WARANG CITI CAPITAL LETTER YU +118A4; C; 118C4; # WARANG CITI CAPITAL LETTER YA +118A5; C; 118C5; # WARANG CITI CAPITAL LETTER YO +118A6; C; 118C6; # WARANG CITI CAPITAL LETTER II +118A7; C; 118C7; # WARANG CITI CAPITAL LETTER UU +118A8; C; 118C8; # WARANG CITI CAPITAL LETTER E +118A9; C; 118C9; # WARANG CITI CAPITAL LETTER O +118AA; C; 118CA; # WARANG CITI CAPITAL LETTER ANG +118AB; C; 118CB; # WARANG CITI CAPITAL LETTER GA +118AC; C; 118CC; # WARANG CITI CAPITAL LETTER KO +118AD; C; 118CD; # WARANG CITI CAPITAL LETTER ENY +118AE; C; 118CE; # WARANG CITI CAPITAL LETTER YUJ +118AF; C; 118CF; # WARANG CITI CAPITAL LETTER UC +118B0; C; 118D0; # WARANG CITI CAPITAL LETTER ENN +118B1; C; 118D1; # WARANG CITI CAPITAL LETTER ODD +118B2; C; 118D2; # WARANG CITI CAPITAL LETTER TTE +118B3; C; 118D3; # WARANG CITI CAPITAL LETTER NUNG +118B4; C; 118D4; # WARANG CITI CAPITAL LETTER DA +118B5; C; 118D5; # WARANG CITI CAPITAL LETTER AT +118B6; C; 118D6; # WARANG CITI CAPITAL LETTER AM +118B7; C; 118D7; # WARANG CITI CAPITAL LETTER BU +118B8; C; 118D8; # WARANG CITI CAPITAL LETTER PU +118B9; C; 118D9; # WARANG CITI CAPITAL LETTER HIYO +118BA; C; 118DA; # WARANG CITI CAPITAL LETTER HOLO +118BB; C; 118DB; # WARANG CITI CAPITAL LETTER HORR +118BC; C; 118DC; # WARANG CITI CAPITAL LETTER HAR +118BD; C; 118DD; # WARANG CITI CAPITAL LETTER SSUU +118BE; C; 118DE; # WARANG CITI CAPITAL LETTER SII +118BF; C; 118DF; # WARANG CITI CAPITAL LETTER VIYO +# +# EOF diff --git a/js/src/vm/RegExpObject.cpp b/js/src/vm/RegExpObject.cpp index 1faf252e7..f6973edeb 100644 --- a/js/src/vm/RegExpObject.cpp +++ b/js/src/vm/RegExpObject.cpp @@ -519,7 +519,7 @@ RegExpShared::compile(JSContext* cx, HandleAtom pattern, HandleLinearString inpu /* Parse the pattern. */ irregexp::RegExpCompileData data; if (!irregexp::ParsePattern(dummyTokenStream, cx->tempLifoAlloc(), pattern, - multiline(), mode == MatchOnly, unicode(), &data)) + multiline(), mode == MatchOnly, unicode(), ignoreCase(), &data)) { return false; } @@ -532,7 +532,7 @@ RegExpShared::compile(JSContext* cx, HandleAtom pattern, HandleLinearString inpu input->hasLatin1Chars(), mode == MatchOnly, force == ForceByteCode, - sticky()); + sticky(), unicode()); if (code.empty()) return false; diff --git a/js/src/vm/Unicode.cpp b/js/src/vm/Unicode.cpp index b110fbc5d..9214450ac 100644 --- a/js/src/vm/Unicode.cpp +++ b/js/src/vm/Unicode.cpp @@ -772,4 +772,439 @@ const uint8_t unicode::index2[] = { 5, 5, 5, 0, 0, 0, }; +const FoldingInfo unicode::js_foldinfo[] = { + {0, 0, 0, 0}, + {32, 0, 0, 0}, + {32, 8415, 0, 0}, + {32, 300, 0, 0}, + {0, 65504, 0, 0}, + {0, 65504, 8383, 0}, + {0, 65504, 268, 0}, + {775, 743, 0, 0}, + {32, 8294, 0, 0}, + {0, 7615, 0, 0}, + {0, 65504, 8262, 0}, + {0, 121, 0, 0}, + {1, 0, 0, 0}, + {0, 65535, 0, 0}, + {65415, 0, 0, 0}, + {65268, 65236, 0, 0}, + {0, 195, 0, 0}, + {210, 0, 0, 0}, + {206, 0, 0, 0}, + {205, 0, 0, 0}, + {79, 0, 0, 0}, + {202, 0, 0, 0}, + {203, 0, 0, 0}, + {207, 0, 0, 0}, + {0, 97, 0, 0}, + {211, 0, 0, 0}, + {209, 0, 0, 0}, + {0, 163, 0, 0}, + {213, 0, 0, 0}, + {0, 130, 0, 0}, + {214, 0, 0, 0}, + {218, 0, 0, 0}, + {217, 0, 0, 0}, + {219, 0, 0, 0}, + {0, 56, 0, 0}, + {2, 1, 0, 0}, + {1, 65535, 0, 0}, + {0, 65534, 65535, 0}, + {0, 65457, 0, 0}, + {65439, 0, 0, 0}, + {65480, 0, 0, 0}, + {65406, 0, 0, 0}, + {10795, 0, 0, 0}, + {65373, 0, 0, 0}, + {10792, 0, 0, 0}, + {0, 10815, 0, 0}, + {65341, 0, 0, 0}, + {69, 0, 0, 0}, + {71, 0, 0, 0}, + {0, 10783, 0, 0}, + {0, 10780, 0, 0}, + {0, 10782, 0, 0}, + {0, 65326, 0, 0}, + {0, 65330, 0, 0}, + {0, 65331, 0, 0}, + {0, 65334, 0, 0}, + {0, 65333, 0, 0}, + {0, 42319, 0, 0}, + {0, 42315, 0, 0}, + {0, 65329, 0, 0}, + {0, 42280, 0, 0}, + {0, 42308, 0, 0}, + {0, 65327, 0, 0}, + {0, 65325, 0, 0}, + {0, 10743, 0, 0}, + {0, 42305, 0, 0}, + {0, 10749, 0, 0}, + {0, 65323, 0, 0}, + {0, 65322, 0, 0}, + {0, 10727, 0, 0}, + {0, 65318, 0, 0}, + {0, 42282, 0, 0}, + {0, 65467, 0, 0}, + {0, 65319, 0, 0}, + {0, 65465, 0, 0}, + {0, 65317, 0, 0}, + {0, 42261, 0, 0}, + {0, 42258, 0, 0}, + {116, 84, 7289, 0}, + {116, 0, 0, 0}, + {38, 0, 0, 0}, + {37, 0, 0, 0}, + {64, 0, 0, 0}, + {63, 0, 0, 0}, + {32, 62, 0, 0}, + {32, 96, 0, 0}, + {32, 57, 92, 0}, + {32, 65452, 7205, 0}, + {32, 86, 0, 0}, + {32, 64793, 0, 0}, + {32, 54, 0, 0}, + {32, 80, 0, 0}, + {32, 31, 0, 0}, + {32, 47, 0, 0}, + {32, 7549, 0, 0}, + {0, 65498, 0, 0}, + {0, 65499, 0, 0}, + {0, 65504, 30, 0}, + {0, 65504, 64, 0}, + {0, 65504, 25, 60}, + {0, 65420, 65504, 7173}, + {0, 65504, 54, 0}, + {0, 64761, 65504, 0}, + {0, 65504, 22, 0}, + {0, 65504, 48, 0}, + {1, 65505, 0, 0}, + {0, 65504, 65535, 0}, + {0, 65504, 15, 0}, + {0, 65504, 7517, 0}, + {0, 65472, 0, 0}, + {0, 65473, 0, 0}, + {8, 0, 0, 0}, + {65506, 65474, 0, 0}, + {65511, 65479, 35, 0}, + {65521, 65489, 0, 0}, + {65514, 65482, 0, 0}, + {0, 65528, 0, 0}, + {65482, 65450, 0, 0}, + {65488, 65456, 0, 0}, + {0, 7, 0, 0}, + {0, 65420, 0, 0}, + {65476, 65444, 65501, 0}, + {65472, 65440, 0, 0}, + {65529, 0, 0, 0}, + {80, 0, 0, 0}, + {0, 65456, 0, 0}, + {15, 0, 0, 0}, + {0, 65521, 0, 0}, + {48, 0, 0, 0}, + {0, 65488, 0, 0}, + {7264, 0, 0, 0}, + {0, 38864, 0, 0}, + {0, 8, 0, 0}, + {65528, 0, 0, 0}, + {0, 35332, 0, 0}, + {0, 3814, 0, 0}, + {1, 59, 0, 0}, + {0, 65535, 58, 0}, + {65478, 65477, 0, 0}, + {57921, 0, 0, 0}, + {0, 74, 0, 0}, + {0, 86, 0, 0}, + {0, 100, 0, 0}, + {0, 128, 0, 0}, + {0, 112, 0, 0}, + {0, 126, 0, 0}, + {0, 9, 0, 0}, + {65462, 0, 0, 0}, + {65527, 0, 0, 0}, + {58363, 58247, 58331, 0}, + {65450, 0, 0, 0}, + {65436, 0, 0, 0}, + {65424, 0, 0, 0}, + {65408, 0, 0, 0}, + {65410, 0, 0, 0}, + {58019, 57987, 0, 0}, + {57153, 57121, 0, 0}, + {57274, 57242, 0, 0}, + {28, 0, 0, 0}, + {0, 65508, 0, 0}, + {16, 0, 0, 0}, + {0, 65520, 0, 0}, + {26, 0, 0, 0}, + {0, 65510, 0, 0}, + {54793, 0, 0, 0}, + {61722, 0, 0, 0}, + {54809, 0, 0, 0}, + {0, 54741, 0, 0}, + {0, 54744, 0, 0}, + {54756, 0, 0, 0}, + {54787, 0, 0, 0}, + {54753, 0, 0, 0}, + {54754, 0, 0, 0}, + {54721, 0, 0, 0}, + {0, 58272, 0, 0}, + {30204, 0, 0, 0}, + {23256, 0, 0, 0}, + {23228, 0, 0, 0}, + {23217, 0, 0, 0}, + {23221, 0, 0, 0}, + {23231, 0, 0, 0}, + {23278, 0, 0, 0}, + {23254, 0, 0, 0}, + {23275, 0, 0, 0}, + {928, 0, 0, 0}, + {0, 64608, 0, 0}, + {26672, 0, 0, 0}, +}; + +const uint8_t unicode::folding_index1[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 0, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 22, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 23, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 0, 0, 26, 27, 28, 26, 29, 30, + 31, 32, 0, 0, 0, 0, 33, 34, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 36, 37, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 39, 26, 40, + 41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, + 43, 0, 44, 45, 46, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 47, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, 50, 0, 0, +}; + +const uint8_t unicode::folding_index2[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, + 1, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, + 4, 4, 4, 4, 4, 4, 4, 6, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 8, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 9, 4, 4, 4, 4, 4, 10, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4, + 4, 4, 4, 11, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 0, 0, + 12, 13, 12, 13, 12, 13, 0, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, + 13, 12, 13, 12, 13, 0, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 14, 12, + 13, 12, 13, 12, 13, 15, 16, 17, 12, 13, 12, 13, 18, 12, 13, 19, 19, 12, + 13, 0, 20, 21, 22, 12, 13, 19, 23, 24, 25, 26, 12, 13, 27, 0, 25, 28, + 29, 30, 12, 13, 12, 13, 12, 13, 31, 12, 13, 31, 0, 0, 12, 13, 31, 12, + 13, 32, 32, 12, 13, 12, 13, 33, 12, 13, 0, 0, 12, 13, 0, 34, 0, 0, + 0, 0, 35, 36, 37, 35, 36, 37, 35, 36, 37, 12, 13, 12, 13, 12, 13, 12, + 13, 12, 13, 12, 13, 12, 13, 12, 13, 38, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 0, 35, 36, 37, 12, 13, 39, 40, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 41, 0, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 0, 0, 0, 0, 0, 0, 42, 12, 13, 43, 44, 45, + 45, 12, 13, 46, 47, 48, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 49, 50, + 51, 52, 53, 0, 54, 54, 0, 55, 0, 56, 57, 0, 0, 0, 54, 58, 0, 59, + 0, 60, 61, 0, 62, 63, 0, 64, 65, 0, 0, 63, 0, 66, 67, 0, 0, 68, + 0, 0, 0, 0, 0, 0, 0, 69, 0, 0, 70, 0, 0, 70, 0, 0, 0, 71, + 70, 72, 73, 73, 74, 0, 0, 0, 0, 0, 75, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 76, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 78, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 12, 13, + 0, 0, 12, 13, 0, 0, 0, 29, 29, 29, 0, 79, 0, 0, 0, 0, 0, 0, + 80, 0, 81, 81, 81, 0, 82, 0, 83, 83, 0, 1, 84, 1, 1, 85, 1, 1, + 86, 87, 88, 1, 89, 1, 1, 1, 90, 91, 0, 92, 1, 1, 93, 1, 1, 94, + 1, 1, 95, 96, 96, 96, 0, 4, 97, 4, 4, 98, 4, 4, 99, 100, 101, 4, + 102, 4, 4, 4, 103, 104, 105, 106, 4, 4, 107, 4, 4, 108, 4, 4, 109, 110, + 110, 111, 112, 113, 0, 0, 0, 114, 115, 116, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 117, 118, + 119, 120, 121, 122, 0, 12, 13, 123, 12, 13, 0, 41, 41, 41, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, + 125, 125, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 126, 12, 13, 12, 13, 12, 13, 12, 13, 12, + 13, 12, 13, 12, 13, 127, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, + 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, + 129, 129, 129, 129, 129, 129, 129, 129, 129, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 0, 130, 0, 0, 0, 0, 0, 130, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 131, 131, 131, 131, 131, 131, 131, 131, + 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, + 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, + 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, + 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, + 132, 132, 132, 132, 132, 132, 0, 0, 133, 133, 133, 133, 133, 133, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 134, 0, 0, 0, 135, 0, 0, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 136, 137, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 0, 0, 0, 0, + 0, 138, 0, 0, 139, 0, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 132, 132, 132, 132, 132, 132, 132, 132, 133, 133, 133, 133, 133, 133, 133, 133, + 132, 132, 132, 132, 132, 132, 0, 0, 133, 133, 133, 133, 133, 133, 0, 0, 132, 132, + 132, 132, 132, 132, 132, 132, 133, 133, 133, 133, 133, 133, 133, 133, 132, 132, 132, 132, + 132, 132, 132, 132, 133, 133, 133, 133, 133, 133, 133, 133, 132, 132, 132, 132, 132, 132, + 0, 0, 133, 133, 133, 133, 133, 133, 0, 0, 0, 132, 0, 132, 0, 132, 0, 132, + 0, 133, 0, 133, 0, 133, 0, 133, 132, 132, 132, 132, 132, 132, 132, 132, 133, 133, + 133, 133, 133, 133, 133, 133, 140, 140, 141, 141, 141, 141, 142, 142, 143, 143, 144, 144, + 145, 145, 0, 0, 132, 132, 132, 132, 132, 132, 132, 132, 133, 133, 133, 133, 133, 133, + 133, 133, 132, 132, 132, 132, 132, 132, 132, 132, 133, 133, 133, 133, 133, 133, 133, 133, + 132, 132, 132, 132, 132, 132, 132, 132, 133, 133, 133, 133, 133, 133, 133, 133, 132, 132, + 0, 146, 0, 0, 0, 0, 133, 133, 147, 147, 148, 0, 149, 0, 0, 0, 0, 146, + 0, 0, 0, 0, 150, 150, 150, 150, 148, 0, 0, 0, 132, 132, 0, 0, 0, 0, + 0, 0, 133, 133, 151, 151, 0, 0, 0, 0, 132, 132, 0, 0, 0, 119, 0, 0, + 133, 133, 152, 152, 123, 0, 0, 0, 0, 0, 0, 146, 0, 0, 0, 0, 153, 153, + 154, 154, 148, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 155, 0, 0, 0, 156, 157, 0, 0, 0, 0, + 0, 0, 158, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 161, 161, 161, 161, 161, 161, 161, 161, + 161, 161, 161, 161, 161, 161, 161, 161, 0, 0, 0, 12, 13, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, + 162, 162, 162, 162, 162, 162, 162, 162, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, + 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 0, 129, 129, 129, 129, + 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, + 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, + 129, 129, 129, 129, 129, 129, 129, 0, 12, 13, 164, 165, 166, 167, 168, 12, 13, 12, + 13, 12, 13, 169, 170, 171, 172, 0, 12, 13, 0, 12, 13, 0, 0, 0, 0, 0, + 0, 0, 173, 173, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 0, 0, 0, 0, 0, 0, 0, 12, 13, 12, 13, 0, 0, 0, + 12, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 174, 174, 174, 174, + 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, + 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 0, 174, + 0, 0, 0, 0, 0, 174, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 0, 0, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 12, 13, 12, 13, 175, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, + 0, 0, 0, 12, 13, 176, 0, 0, 12, 13, 12, 13, 0, 0, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 177, 178, + 179, 180, 0, 0, 181, 182, 183, 184, 12, 13, 12, 13, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 185, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 186, 186, 186, 186, + 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, + 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, + 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, + 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, + 186, 186, 186, 186, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, +}; + diff --git a/js/src/vm/Unicode.h b/js/src/vm/Unicode.h index 1276e915a..3ab452c2a 100644 --- a/js/src/vm/Unicode.h +++ b/js/src/vm/Unicode.h @@ -234,6 +234,55 @@ CanLowerCase(char16_t ch) return CharInfo(ch).lowerCase != 0; } +class FoldingInfo { + public: + uint16_t folding; + uint16_t reverse1; + uint16_t reverse2; + uint16_t reverse3; +}; + +extern const uint8_t folding_index1[]; +extern const uint8_t folding_index2[]; +extern const FoldingInfo js_foldinfo[]; + +inline const FoldingInfo& +CaseFoldInfo(char16_t code) +{ + const size_t shift = 6; + size_t index = folding_index1[code >> shift]; + index = folding_index2[(index << shift) + (code & ((1 << shift) - 1))]; + return js_foldinfo[index]; +} + +inline char16_t +FoldCase(char16_t ch) +{ + const FoldingInfo& info = CaseFoldInfo(ch); + return uint16_t(ch) + info.folding; +} + +inline char16_t +ReverseFoldCase1(char16_t ch) +{ + const FoldingInfo& info = CaseFoldInfo(ch); + return uint16_t(ch) + info.reverse1; +} + +inline char16_t +ReverseFoldCase2(char16_t ch) +{ + const FoldingInfo& info = CaseFoldInfo(ch); + return uint16_t(ch) + info.reverse2; +} + +inline char16_t +ReverseFoldCase3(char16_t ch) +{ + const FoldingInfo& info = CaseFoldInfo(ch); + return uint16_t(ch) + info.reverse3; +} + const size_t LeadSurrogateMin = 0xD800; const size_t LeadSurrogateMax = 0xDBFF; const size_t TrailSurrogateMin = 0xDC00; diff --git a/js/src/vm/make_unicode.py b/js/src/vm/make_unicode.py index 16e521da9..19c0aab58 100644 --- a/js/src/vm/make_unicode.py +++ b/js/src/vm/make_unicode.py @@ -84,13 +84,33 @@ def read_unicode_data(unicode_file): row[0] = int(row[0], 16) yield row -def generate_unicode_stuff(unicode_data, data_file, test_mapping, test_space): +def read_case_folding(case_folding): + for line in case_folding: + if line == '\n' or line.startswith('#'): + continue + row = line.split('; ') + if row[1] in ['F', 'T']: + continue + row[0] = int(row[0], 16) + row[2] = int(row[2], 16) + yield row + +def generate_unicode_stuff(unicode_data, case_folding, + data_file, test_mapping, test_space, test_icase): dummy = (0, 0, 0) table = [dummy] cache = {dummy: 0} index = [0] * (MAX + 1) + folding_map = {} + rev_folding_map = {} + folding_dummy = (0, 0, 0, 0) + folding_table = [folding_dummy] + folding_cache = {folding_dummy: 0} + folding_index = [0] * (MAX + 1) test_table = {} test_space_table = [] + folding_tests = [] + folding_codes = set() for row in read_unicode_data(unicode_data): code = row[0] @@ -143,6 +163,64 @@ def generate_unicode_stuff(unicode_data, data_file, test_mapping, test_space): table.append(item) index[code] = i + for row in read_case_folding(case_folding): + code = row[0] + mapping = row[2] + folding_map[code] = mapping + + if mapping not in rev_folding_map: + rev_folding_map[mapping] = [code] + else: + rev_folding_map[mapping].append(code) + + folding_codes.add(code) + folding_codes.add(mapping) + + for code in sorted(folding_codes): + if code > MAX: + continue + + if code in folding_map: + folding = folding_map[code] + else: + folding = code + + if code in rev_folding_map: + rev_folding = rev_folding_map[code] + elif folding in rev_folding_map: + rev_folding = [c for c in rev_folding_map[folding] if c != code] + else: + rev_folding = [] + + assert len(rev_folding) <= 3 + + if folding != code or len(rev_folding): + item = [code] + if folding != code: + item.append(folding) + folding_tests.append(item + rev_folding) + + folding_d = folding - code + rev_folding_ds = [v - code for v in rev_folding] + + assert folding_d > -65535 and folding_d < 65535 + assert all([v > -65535 and v < 65535 for v in rev_folding]) + + folding = folding_d & 0xffff + rev_folding = [v & 0xffff for v in rev_folding_ds] + rev_folding_0 = rev_folding[0] if len(rev_folding) >= 1 else 0 + rev_folding_1 = rev_folding[1] if len(rev_folding) >= 2 else 0 + rev_folding_2 = rev_folding[2] if len(rev_folding) >= 3 else 0 + + item = (folding, rev_folding_0, rev_folding_1, rev_folding_2) + + i = folding_cache.get(item) + if i is None: + assert item not in folding_table + folding_cache[item] = i = len(folding_table) + folding_table.append(item) + folding_index[code] = i + test_mapping.write('/* Generated by make_unicode.py DO NOT MODIFY */\n') test_mapping.write(public_domain) test_mapping.write('var mapping = [\n') @@ -180,6 +258,29 @@ assertEq((onlySpace + 'aaaa').trim(), 'aaaa'); assertEq(('aaaa' + onlySpace).trim(), 'aaaa'); assertEq((onlySpace + 'aaaa' + onlySpace).trim(), 'aaaa'); +if (typeof reportCompare === "function") + reportCompare(true, true); +""") + + test_icase.write('/* Generated by make_unicode.py DO NOT MODIFY */\n') + test_icase.write(public_domain) + test_icase.write(""" +var BUGNUMBER = 1135377; +var summary = "Implement RegExp unicode flag -- ignoreCase flag."; + +print(BUGNUMBER + ": " + summary); + +function test(code, ...equivs) { + var codeRe = new RegExp(String.fromCodePoint(code) + "+", "iu"); + var ans = String.fromCodePoint(code) + equivs.map(c => String.fromCodePoint(c)).join(""); + assertEqArray(codeRe.exec("<" + ans + ">"), [ans]); + codeRe = new RegExp("[" + String.fromCodePoint(code) + "]+", "iu"); + assertEqArray(codeRe.exec("<" + ans + ">"), [ans]); +} +""") + for args in folding_tests: + test_icase.write('test(' + ','.join([hex(c) for c in args]) + ');\n') + test_icase.write(""" if (typeof reportCompare === "function") reportCompare(true, true); """) @@ -189,6 +290,11 @@ if (typeof reportCompare === "function") # Don't forget to update CharInfo in Unicode.cpp if you need to change this assert shift == 5 + folding_index1, folding_index2, folding_shift = splitbins(folding_index) + + # Don't forget to update CharInfo in Unicode.cpp if you need to change this + assert folding_shift == 6 + # verify correctness for char in index: test = table[index[char]] @@ -198,6 +304,14 @@ if (typeof reportCompare === "function") assert test == table[idx] + # verify correctness + for char in folding_index: + test = folding_table[folding_index[char]] + + idx = folding_index1[char >> folding_shift] + idx = folding_index2[(idx << folding_shift) + (char & ((1 << folding_shift) - 1))] + + assert test == folding_table[idx] comment = """ /* @@ -284,6 +398,19 @@ if (typeof reportCompare === "function") dump(index2, 'index2', data_file) data_file.write('\n') + data_file.write('const FoldingInfo unicode::js_foldinfo[] = {\n') + for d in folding_table: + data_file.write(' {') + data_file.write(', '.join((str(e) for e in d))) + data_file.write('},\n') + data_file.write('};\n') + data_file.write('\n') + + dump(folding_index1, 'folding_index1', data_file) + data_file.write('\n') + dump(folding_index2, 'folding_index2', data_file) + data_file.write('\n') + data_file.write('\n') def getsize(data): @@ -362,7 +489,7 @@ if __name__ == '__main__': print('Always make sure you have the newest UnicodeData.txt!') unicode_data = open(sys.argv[1], 'r') else: - print('Downloading...') + print('Downloading UnicodeData.txt...') reader = urllib2.urlopen('http://unicode.org/Public/UNIDATA/UnicodeData.txt') data = reader.read() reader.close() @@ -370,8 +497,21 @@ if __name__ == '__main__': unicode_data.write(data) unicode_data.seek(0) + if len(sys.argv) > 2: + print('Always make sure you have the newest CaseFolding.txt!') + case_folding = open(sys.argv[2], 'r') + else: + print('Downloading CaseFolding.txt...') + reader = urllib2.urlopen('http://unicode.org/Public/UNIDATA/CaseFolding.txt') + data = reader.read() + reader.close() + case_folding = open('CaseFolding.txt', 'w+') + case_folding.write(data) + case_folding.seek(0) + print('Generating...') - generate_unicode_stuff(unicode_data, + generate_unicode_stuff(unicode_data, case_folding, open('Unicode.cpp', 'w'), open('../tests/ecma_5/String/string-upper-lower-mapping.js', 'w'), - open('../tests/ecma_5/String/string-space-trim.js', 'w')) + open('../tests/ecma_5/String/string-space-trim.js', 'w'), + open('../tests/ecma_6/RegExp/unicode-ignoreCase.js', 'w'))