From 0c70c7c824a9f4e15acdfa4a719aa19729ab87a4 Mon Sep 17 00:00:00 2001 From: Cameron Kaiser Date: Mon, 12 Mar 2018 20:10:32 -0700 Subject: [PATCH] #482: better bidi M1358275 M1392181 M1428774 + backbugs --- dom/base/nsTextFragment.cpp | 58 ++++++-------- dom/base/nsTextFragment.h | 4 + intl/unicharutil/util/nsBidiUtils.cpp | 24 +++--- intl/unicharutil/util/nsBidiUtils.h | 46 ++++++++++- layout/base/nsBidiPresUtils.cpp | 107 +++++++++++++++++++++++++- layout/base/nsBidiPresUtils.h | 17 ++++ 6 files changed, 210 insertions(+), 46 deletions(-) diff --git a/dom/base/nsTextFragment.cpp b/dom/base/nsTextFragment.cpp index 52ccd7a59..0b8e2f045 100644 --- a/dom/base/nsTextFragment.cpp +++ b/dom/base/nsTextFragment.cpp @@ -345,22 +345,22 @@ nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBi // Should we optimize for aData.Length() == 0? - CheckedUint32 length = mState.mLength; - length += aLength; - - if (!length.isValid()) { - return false; + // FYI: Don't use CheckedInt in this method since here is very hot path + // in some performance tests. + if (MOZ_UNLIKELY(NS_MAX_TEXT_FRAGMENT_LENGTH - mState.mLength < aLength)) { + return false; // Would be overflown if we'd keep handling. } if (mState.mIs2b) { - length *= sizeof(char16_t); - if (!length.isValid()) { - return false; + size_t size = mState.mLength + aLength; + if (MOZ_UNLIKELY(SIZE_MAX / sizeof(char16_t) < size)) { + return false; // Would be overflown if we'd keep handling. } + size *= sizeof(char16_t); // Already a 2-byte string so the result will be too - char16_t* buff = static_cast(realloc(m2b, length.value())); - if (!buff) { + char16_t* buff = static_cast(realloc(m2b, size)); + if (MOZ_UNLIKELY(!buff)) { return false; } @@ -379,15 +379,16 @@ nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBi int32_t first16bit = FirstNon8Bit(aBuffer, aBuffer + aLength); if (first16bit != -1) { // aBuffer contains no non-8bit character - length *= sizeof(char16_t); - if (!length.isValid()) { - return false; + size_t size = mState.mLength + aLength; + if (MOZ_UNLIKELY((SIZE_MAX / sizeof(char16_t)) < size)) { + return false; // Would be overflown if we'd keep handling. } + size *= sizeof(char16_t); // The old data was 1-byte, but the new is not so we have to expand it // all to 2-byte - char16_t* buff = static_cast(malloc(length.value())); - if (!buff) { + char16_t* buff = static_cast(malloc(size)); + if (MOZ_UNLIKELY(!buff)) { return false; } @@ -414,16 +415,18 @@ nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBi } // The new and the old data is all 1-byte + size_t size = mState.mLength + aLength; + MOZ_ASSERT(sizeof(char) == 1); char* buff; if (mState.mInHeap) { - buff = static_cast(realloc(const_cast(m1b), length.value())); - if (!buff) { + buff = static_cast(realloc(const_cast(m1b), size)); + if (MOZ_UNLIKELY(!buff)) { return false; } } else { - buff = static_cast(malloc(length.value())); - if (!buff) { + buff = static_cast(malloc(size)); + if (MOZ_UNLIKELY(!buff)) { return false; } @@ -461,21 +464,8 @@ void nsTextFragment::UpdateBidiFlag(const char16_t* aBuffer, uint32_t aLength) { if (mState.mIs2b && !mState.mIsBidi) { - const char16_t* cp = aBuffer; - const char16_t* end = cp + aLength; - while (cp < end) { - char16_t ch1 = *cp++; - uint32_t utf32Char = ch1; - if (NS_IS_HIGH_SURROGATE(ch1) && - cp < end && - NS_IS_LOW_SURROGATE(*cp)) { - char16_t ch2 = *cp++; - utf32Char = SURROGATE_TO_UCS4(ch1, ch2); - } - if (UTF32_CHAR_IS_BIDI(utf32Char) || IsBidiControl(utf32Char)) { - mState.mIsBidi = true; - break; - } + if (HasRTLChars(aBuffer, aLength)) { + mState.mIsBidi = true; } } } diff --git a/dom/base/nsTextFragment.h b/dom/base/nsTextFragment.h index d9a0aa839..25ed79a7d 100644 --- a/dom/base/nsTextFragment.h +++ b/dom/base/nsTextFragment.h @@ -213,9 +213,13 @@ public: uint32_t mInHeap : 1; uint32_t mIs2b : 1; uint32_t mIsBidi : 1; + // Note that when you change the bits of mLength, you also need to change + // NS_MAX_TEXT_FRAGMENT_LENGTH. uint32_t mLength : 29; }; +#define NS_MAX_TEXT_FRAGMENT_LENGTH (static_cast(0x1FFFFFFF)) + size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const; private: diff --git a/intl/unicharutil/util/nsBidiUtils.cpp b/intl/unicharutil/util/nsBidiUtils.cpp index 8c186ac68..db7ed41fd 100644 --- a/intl/unicharutil/util/nsBidiUtils.cpp +++ b/intl/unicharutil/util/nsBidiUtils.cpp @@ -5,6 +5,10 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "nsBidiUtils.h" +namespace mozilla { +static const uint32_t kMinRTLChar = 0x0590; +} // namespace mozilla; + #define ARABIC_TO_HINDI_DIGIT_INCREMENT (START_HINDI_DIGITS - START_ARABIC_DIGITS) #define PERSIAN_TO_HINDI_DIGIT_INCREMENT (START_HINDI_DIGITS - START_FARSI_DIGITS) #define ARABIC_TO_PERSIAN_DIGIT_INCREMENT (START_FARSI_DIGITS - START_ARABIC_DIGITS) @@ -82,16 +86,18 @@ nsresult HandleNumbers(char16_t* aBuffer, uint32_t aSize, uint32_t aNumFlag) return NS_OK; } -bool HasRTLChars(const nsAString& aString) +bool HasRTLChars(const char16_t* aText, uint32_t aLength) { -// This is used to determine whether to enable bidi if a string has -// right-to-left characters. To simplify things, anything that could be a -// surrogate or RTL presentation form is covered just by testing >= 0xD800). -// It's fine to enable bidi in rare cases where it actually isn't needed. - int32_t length = aString.Length(); - for (int32_t i = 0; i < length; i++) { - char16_t ch = aString.CharAt(i); - if (ch >= 0xD800 || IS_IN_BMP_RTL_BLOCK(ch)) { + // This is used to determine whether a string has right-to-left characters + // that mean it will require bidi processing. + const char16_t* cp = aText; + const char16_t* end = cp + aLength; + while (cp < end) { + char16_t ch = *cp++; + if (ch < mozilla::kMinRTLChar) { + continue; + } + if (UTF16_CODE_UNIT_IS_BIDI(ch) || IsBidiControlRTL(ch)) { return true; } } diff --git a/intl/unicharutil/util/nsBidiUtils.h b/intl/unicharutil/util/nsBidiUtils.h index ce2ac307b..30c13ac10 100644 --- a/intl/unicharutil/util/nsBidiUtils.h +++ b/intl/unicharutil/util/nsBidiUtils.h @@ -110,10 +110,19 @@ typedef enum nsCharType nsCharType; * Return false, otherwise */ #define LRM_CHAR 0x200e +#define RLM_CHAR 0x200f + #define LRE_CHAR 0x202a +#define RLE_CHAR 0x202b +#define PDF_CHAR 0x202c +#define LRO_CHAR 0x202d #define RLO_CHAR 0x202e + #define LRI_CHAR 0x2066 +#define RLI_CHAR 0x2067 +#define FSI_CHAR 0x2068 #define PDI_CHAR 0x2069 + #define ALM_CHAR 0x061C inline bool IsBidiControl(uint32_t aChar) { return ((LRE_CHAR <= aChar && aChar <= RLO_CHAR) || @@ -123,10 +132,32 @@ typedef enum nsCharType nsCharType; } /** - * Give an nsString. + * Give a UTF-16 codepoint (changed for TenFourFox; we don't use this for + * UTF-32 characters, so the conversion is unnecessary). + * Return true if the codepoint is a Bidi control character that may result + * in RTL directionality and therefore needs to trigger bidi resolution; + * return false otherwise. + */ + inline bool IsBidiControlRTL(char16_t aChar) { + return aChar == RLM_CHAR || + aChar == RLE_CHAR || + aChar == RLO_CHAR || + aChar == RLI_CHAR || + aChar == ALM_CHAR; + } + + /** + * Give a 16-bit (UTF-16) text buffer and length * @return true if the string contains right-to-left characters */ - bool HasRTLChars(const nsAString& aString); + bool HasRTLChars(const char16_t* aText, uint32_t aLength); + + /** + * Convenience function to call the above on an nsAString. + */ + inline bool HasRTLChars(const nsAString& aString) { + return HasRTLChars(aString.BeginReading(), aString.Length()); + } // These values are shared with Preferences dialog // ------------------ @@ -248,6 +279,17 @@ typedef enum nsCharType nsCharType; ((0xfe70 <= (c)) && ((c) <= 0xfefc))) #define IS_IN_SMP_RTL_BLOCK(c) (((0x10800 <= (c)) && ((c) <= 0x10fff)) || \ ((0x1e800 <= (c)) && ((c) <= 0x1eFFF))) +// Due to the supplementary-plane RTL blocks being identifiable from the +// high surrogate without examining the low surrogate, it is correct to +// use this by-code-unit check on potentially astral text without doing +// the math to decode surrogate pairs into code points. However, unpaired +// high surrogates that are RTL high surrogates then count as RTL even +// though, if replaced by the REPLACEMENT CHARACTER, it would not be +// RTL. +#define UTF16_CODE_UNIT_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \ + (IS_RTL_PRESENTATION_FORM(c)) || \ + (c) == 0xD802 || (c) == 0xD803 || \ + (c) == 0xD83A || (c) == 0xD83B) #define UCS2_CHAR_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \ (IS_RTL_PRESENTATION_FORM(c))) #define UTF32_CHAR_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \ diff --git a/layout/base/nsBidiPresUtils.cpp b/layout/base/nsBidiPresUtils.cpp index b1b6f9754..1184da66a 100644 --- a/layout/base/nsBidiPresUtils.cpp +++ b/layout/base/nsBidiPresUtils.cpp @@ -110,6 +110,7 @@ struct BidiParagraphData { nsTArray mLinePerFrame; nsDataHashtable mContentToFrameIndex; bool mIsVisual; + bool mRequiresBidi; bool mReset; nsBidiLevel mParaLevel; nsIContent* mPrevContent; @@ -121,10 +122,14 @@ struct BidiParagraphData { void Init(nsBlockFrame *aBlockFrame) { mBidiEngine = new nsBidi(); + mRequiresBidi = false; mPrevContent = nullptr; mParagraphDepth = 0; mParaLevel = nsBidiPresUtils::BidiLevelFromStyle(aBlockFrame->StyleContext()); + if (mParaLevel > 0) { + mRequiresBidi = true; + } mIsVisual = aBlockFrame->PresContext()->IsVisualMode(); if (mIsVisual) { @@ -664,14 +669,64 @@ nsBidiPresUtils::Resolve(nsBlockFrame* aBlockFrame) char16_t ch = GetBidiControl(aBlockFrame->StyleContext(), kOverride); if (ch != 0) { bpd.PushBidiControl(ch); + bpd.mRequiresBidi = true; + } else if (!bpd.mRequiresBidi) { + // If there are no unicode-bidi properties and no RTL characters in the + // block's content, then it is pure LTR and we can skip the rest of bidi + // resolution. + nsIContent* currContent = nullptr; + for (nsBlockFrame* block = aBlockFrame; block; + block = static_cast(block->GetNextContinuation())) { + block->RemoveStateBits(NS_BLOCK_NEEDS_BIDI_RESOLUTION); + if (/* !bpd.mRequiresBidi && */ + ChildListMayRequireBidi(block->PrincipalChildList().FirstChild(), + &currContent)) { + bpd.mRequiresBidi = true; + + // Optimization for TenFourFox issue 482: + // It's safe to break here if mRequiresBidi is true because we'll + // pull the state bits off in the loop below (if we didn't, we would + // essentially cause bug 1362423). This also allows us to reduce + // checking mRequiresBidi all the time. + break; + } +#if(0) + // XXX: Unnecessary work given that the below isn't executed. + // If we need bidi support for overflow, we need to enable this too. + /* if (!bpd.mRequiresBidi) { */ + nsBlockFrame::FrameLines* overflowLines = block->GetOverflowLines(); + if (overflowLines) { // XXX: see below + if (ChildListMayRequireBidi(overflowLines->mFrames.FirstChild(), + &currContent)) { + bpd.mRequiresBidi = true; + break; // as above + } + } + /* } */ +#endif + } + if (!bpd.mRequiresBidi) { + return NS_OK; + } } + for (nsBlockFrame* block = aBlockFrame; block; block = static_cast(block->GetNextContinuation())) { block->RemoveStateBits(NS_BLOCK_NEEDS_BIDI_RESOLUTION); nsBlockInFlowLineIterator lineIter(block, block->begin_lines()); bpd.ResetForNewBlock(); TraverseFrames(aBlockFrame, &lineIter, block->GetFirstPrincipalChild(), &bpd); - // XXX what about overflow lines? +#if(0) + nsBlockFrame::FrameLines* overflowLines = block->GetOverflowLines(); + // XXX: Not sure if this is going to break anything. + // It's safe to do above (from bug 1358275) because that doesn't actually + // bidi-process the overflow lines; it just checks if we need to. + if (overflowLines) { + nsBlockInFlowLineIterator it(block, overflowLines->mLines.begin(), true); + bpd.ResetForNewBlock(); + TraverseFrames(aBlockFrame, &it, overflowLines->mFrames.FirstChild(), &bpd); + } +#endif } if (ch != 0) { @@ -1241,6 +1296,56 @@ nsBidiPresUtils::TraverseFrames(nsBlockFrame* aBlockFrame, MOZ_ASSERT(initialLineContainer == aLineIter->GetContainer()); } +bool +nsBidiPresUtils::ChildListMayRequireBidi(nsIFrame* aFirstChild, + nsIContent** aCurrContent) +{ + MOZ_ASSERT(!aFirstChild || !aFirstChild->GetPrevSibling(), + "Expecting to traverse from the start of a child list"); + + for (nsIFrame* childFrame = aFirstChild; childFrame; + childFrame = childFrame->GetNextSibling()) { + + nsIFrame* frame = childFrame; + + // If the real frame for a placeholder is a first-letter frame, we need to + // consider its contents for potential Bidi resolution. + if (childFrame->GetType() == nsGkAtoms::placeholderFrame) { + nsIFrame* realFrame = + nsPlaceholderFrame::GetRealFrameForPlaceholder(childFrame); + if (realFrame->GetType() == nsGkAtoms::letterFrame) { + frame = realFrame; + } + } + + // If unicode-bidi properties are present, we should do bidi resolution. + nsStyleContext* sc = frame->StyleContext(); + if (GetBidiControl(sc, kOverrideOrEmbed)) { + return true; + } + + if (IsBidiLeaf(frame)) { + if (frame->GetType() == nsGkAtoms::textFrame) { + // Check whether the text frame has any RTL characters; if so, bidi + // resolution will be needed. + nsIContent* content = frame->GetContent(); + if (content != *aCurrContent) { + *aCurrContent = content; + const nsTextFragment* txt = content->GetText(); + if (txt->Is2b() && HasRTLChars(txt->Get2b(), txt->GetLength())) { + return true; + } + } + } + } else if (ChildListMayRequireBidi(frame->PrincipalChildList().FirstChild(), + aCurrContent)) { + return true; + } + } + + return false; +} + void nsBidiPresUtils::ResolveParagraphWithinBlock(nsBlockFrame* aBlockFrame, BidiParagraphData* aBpd) diff --git a/layout/base/nsBidiPresUtils.h b/layout/base/nsBidiPresUtils.h index 2c8b4677b..a7a72a30b 100644 --- a/layout/base/nsBidiPresUtils.h +++ b/layout/base/nsBidiPresUtils.h @@ -399,6 +399,23 @@ private: nsIFrame* aCurrentFrame, BidiParagraphData* aBpd); + /** + * Perform a recursive "pre-traversal" of the child frames of a block or + * inline container frame, to determine whether full bidi resolution is + * actually needed. + * This explores the same frames as TraverseFrames (above), but is less + * expensive and may allow us to avoid performing the full TraverseFrames + * operation. + * @param aFirstChild frame to start traversal from + * @param[in/out] aCurrContent the content node that we've most recently + * scanned for RTL characters (so that when descendant frames refer + * to the same content, we can avoid repeatedly scanning it). + * @return true if it finds that bidi is (or may be) required, + * false if no potentially-bidi content is present. + */ + static bool ChildListMayRequireBidi(nsIFrame* aFirstChild, + nsIContent** aCurrContent); + /** * Position ruby content frames (ruby base/text frame). * Called from RepositionRubyFrame.