#482: better bidi M1358275 M1392181 M1428774 + backbugs

This commit is contained in:
Cameron Kaiser 2018-03-12 20:10:32 -07:00
parent 8617914099
commit 0c70c7c824
6 changed files with 210 additions and 46 deletions

View File

@ -345,22 +345,22 @@ nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBi
// Should we optimize for aData.Length() == 0?
CheckedUint32 length = mState.mLength;
length += aLength;
if (!length.isValid()) {
return false;
// FYI: Don't use CheckedInt in this method since here is very hot path
// in some performance tests.
if (MOZ_UNLIKELY(NS_MAX_TEXT_FRAGMENT_LENGTH - mState.mLength < aLength)) {
return false; // Would be overflown if we'd keep handling.
}
if (mState.mIs2b) {
length *= sizeof(char16_t);
if (!length.isValid()) {
return false;
size_t size = mState.mLength + aLength;
if (MOZ_UNLIKELY(SIZE_MAX / sizeof(char16_t) < size)) {
return false; // Would be overflown if we'd keep handling.
}
size *= sizeof(char16_t);
// Already a 2-byte string so the result will be too
char16_t* buff = static_cast<char16_t*>(realloc(m2b, length.value()));
if (!buff) {
char16_t* buff = static_cast<char16_t*>(realloc(m2b, size));
if (MOZ_UNLIKELY(!buff)) {
return false;
}
@ -379,15 +379,16 @@ nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBi
int32_t first16bit = FirstNon8Bit(aBuffer, aBuffer + aLength);
if (first16bit != -1) { // aBuffer contains no non-8bit character
length *= sizeof(char16_t);
if (!length.isValid()) {
return false;
size_t size = mState.mLength + aLength;
if (MOZ_UNLIKELY((SIZE_MAX / sizeof(char16_t)) < size)) {
return false; // Would be overflown if we'd keep handling.
}
size *= sizeof(char16_t);
// The old data was 1-byte, but the new is not so we have to expand it
// all to 2-byte
char16_t* buff = static_cast<char16_t*>(malloc(length.value()));
if (!buff) {
char16_t* buff = static_cast<char16_t*>(malloc(size));
if (MOZ_UNLIKELY(!buff)) {
return false;
}
@ -414,16 +415,18 @@ nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBi
}
// The new and the old data is all 1-byte
size_t size = mState.mLength + aLength;
MOZ_ASSERT(sizeof(char) == 1);
char* buff;
if (mState.mInHeap) {
buff = static_cast<char*>(realloc(const_cast<char*>(m1b), length.value()));
if (!buff) {
buff = static_cast<char*>(realloc(const_cast<char*>(m1b), size));
if (MOZ_UNLIKELY(!buff)) {
return false;
}
}
else {
buff = static_cast<char*>(malloc(length.value()));
if (!buff) {
buff = static_cast<char*>(malloc(size));
if (MOZ_UNLIKELY(!buff)) {
return false;
}
@ -461,21 +464,8 @@ void
nsTextFragment::UpdateBidiFlag(const char16_t* aBuffer, uint32_t aLength)
{
if (mState.mIs2b && !mState.mIsBidi) {
const char16_t* cp = aBuffer;
const char16_t* end = cp + aLength;
while (cp < end) {
char16_t ch1 = *cp++;
uint32_t utf32Char = ch1;
if (NS_IS_HIGH_SURROGATE(ch1) &&
cp < end &&
NS_IS_LOW_SURROGATE(*cp)) {
char16_t ch2 = *cp++;
utf32Char = SURROGATE_TO_UCS4(ch1, ch2);
}
if (UTF32_CHAR_IS_BIDI(utf32Char) || IsBidiControl(utf32Char)) {
mState.mIsBidi = true;
break;
}
if (HasRTLChars(aBuffer, aLength)) {
mState.mIsBidi = true;
}
}
}

View File

@ -213,9 +213,13 @@ public:
uint32_t mInHeap : 1;
uint32_t mIs2b : 1;
uint32_t mIsBidi : 1;
// Note that when you change the bits of mLength, you also need to change
// NS_MAX_TEXT_FRAGMENT_LENGTH.
uint32_t mLength : 29;
};
#define NS_MAX_TEXT_FRAGMENT_LENGTH (static_cast<uint32_t>(0x1FFFFFFF))
size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const;
private:

View File

@ -5,6 +5,10 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsBidiUtils.h"
namespace mozilla {
static const uint32_t kMinRTLChar = 0x0590;
} // namespace mozilla;
#define ARABIC_TO_HINDI_DIGIT_INCREMENT (START_HINDI_DIGITS - START_ARABIC_DIGITS)
#define PERSIAN_TO_HINDI_DIGIT_INCREMENT (START_HINDI_DIGITS - START_FARSI_DIGITS)
#define ARABIC_TO_PERSIAN_DIGIT_INCREMENT (START_FARSI_DIGITS - START_ARABIC_DIGITS)
@ -82,16 +86,18 @@ nsresult HandleNumbers(char16_t* aBuffer, uint32_t aSize, uint32_t aNumFlag)
return NS_OK;
}
bool HasRTLChars(const nsAString& aString)
bool HasRTLChars(const char16_t* aText, uint32_t aLength)
{
// This is used to determine whether to enable bidi if a string has
// right-to-left characters. To simplify things, anything that could be a
// surrogate or RTL presentation form is covered just by testing >= 0xD800).
// It's fine to enable bidi in rare cases where it actually isn't needed.
int32_t length = aString.Length();
for (int32_t i = 0; i < length; i++) {
char16_t ch = aString.CharAt(i);
if (ch >= 0xD800 || IS_IN_BMP_RTL_BLOCK(ch)) {
// This is used to determine whether a string has right-to-left characters
// that mean it will require bidi processing.
const char16_t* cp = aText;
const char16_t* end = cp + aLength;
while (cp < end) {
char16_t ch = *cp++;
if (ch < mozilla::kMinRTLChar) {
continue;
}
if (UTF16_CODE_UNIT_IS_BIDI(ch) || IsBidiControlRTL(ch)) {
return true;
}
}

View File

@ -110,10 +110,19 @@ typedef enum nsCharType nsCharType;
* Return false, otherwise
*/
#define LRM_CHAR 0x200e
#define RLM_CHAR 0x200f
#define LRE_CHAR 0x202a
#define RLE_CHAR 0x202b
#define PDF_CHAR 0x202c
#define LRO_CHAR 0x202d
#define RLO_CHAR 0x202e
#define LRI_CHAR 0x2066
#define RLI_CHAR 0x2067
#define FSI_CHAR 0x2068
#define PDI_CHAR 0x2069
#define ALM_CHAR 0x061C
inline bool IsBidiControl(uint32_t aChar) {
return ((LRE_CHAR <= aChar && aChar <= RLO_CHAR) ||
@ -123,10 +132,32 @@ typedef enum nsCharType nsCharType;
}
/**
* Give an nsString.
* Give a UTF-16 codepoint (changed for TenFourFox; we don't use this for
* UTF-32 characters, so the conversion is unnecessary).
* Return true if the codepoint is a Bidi control character that may result
* in RTL directionality and therefore needs to trigger bidi resolution;
* return false otherwise.
*/
inline bool IsBidiControlRTL(char16_t aChar) {
return aChar == RLM_CHAR ||
aChar == RLE_CHAR ||
aChar == RLO_CHAR ||
aChar == RLI_CHAR ||
aChar == ALM_CHAR;
}
/**
* Give a 16-bit (UTF-16) text buffer and length
* @return true if the string contains right-to-left characters
*/
bool HasRTLChars(const nsAString& aString);
bool HasRTLChars(const char16_t* aText, uint32_t aLength);
/**
* Convenience function to call the above on an nsAString.
*/
inline bool HasRTLChars(const nsAString& aString) {
return HasRTLChars(aString.BeginReading(), aString.Length());
}
// These values are shared with Preferences dialog
// ------------------
@ -248,6 +279,17 @@ typedef enum nsCharType nsCharType;
((0xfe70 <= (c)) && ((c) <= 0xfefc)))
#define IS_IN_SMP_RTL_BLOCK(c) (((0x10800 <= (c)) && ((c) <= 0x10fff)) || \
((0x1e800 <= (c)) && ((c) <= 0x1eFFF)))
// Due to the supplementary-plane RTL blocks being identifiable from the
// high surrogate without examining the low surrogate, it is correct to
// use this by-code-unit check on potentially astral text without doing
// the math to decode surrogate pairs into code points. However, unpaired
// high surrogates that are RTL high surrogates then count as RTL even
// though, if replaced by the REPLACEMENT CHARACTER, it would not be
// RTL.
#define UTF16_CODE_UNIT_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \
(IS_RTL_PRESENTATION_FORM(c)) || \
(c) == 0xD802 || (c) == 0xD803 || \
(c) == 0xD83A || (c) == 0xD83B)
#define UCS2_CHAR_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \
(IS_RTL_PRESENTATION_FORM(c)))
#define UTF32_CHAR_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \

View File

@ -110,6 +110,7 @@ struct BidiParagraphData {
nsTArray<nsLineBox*> mLinePerFrame;
nsDataHashtable<nsISupportsHashKey, int32_t> mContentToFrameIndex;
bool mIsVisual;
bool mRequiresBidi;
bool mReset;
nsBidiLevel mParaLevel;
nsIContent* mPrevContent;
@ -121,10 +122,14 @@ struct BidiParagraphData {
void Init(nsBlockFrame *aBlockFrame)
{
mBidiEngine = new nsBidi();
mRequiresBidi = false;
mPrevContent = nullptr;
mParagraphDepth = 0;
mParaLevel = nsBidiPresUtils::BidiLevelFromStyle(aBlockFrame->StyleContext());
if (mParaLevel > 0) {
mRequiresBidi = true;
}
mIsVisual = aBlockFrame->PresContext()->IsVisualMode();
if (mIsVisual) {
@ -664,14 +669,64 @@ nsBidiPresUtils::Resolve(nsBlockFrame* aBlockFrame)
char16_t ch = GetBidiControl(aBlockFrame->StyleContext(), kOverride);
if (ch != 0) {
bpd.PushBidiControl(ch);
bpd.mRequiresBidi = true;
} else if (!bpd.mRequiresBidi) {
// If there are no unicode-bidi properties and no RTL characters in the
// block's content, then it is pure LTR and we can skip the rest of bidi
// resolution.
nsIContent* currContent = nullptr;
for (nsBlockFrame* block = aBlockFrame; block;
block = static_cast<nsBlockFrame*>(block->GetNextContinuation())) {
block->RemoveStateBits(NS_BLOCK_NEEDS_BIDI_RESOLUTION);
if (/* !bpd.mRequiresBidi && */
ChildListMayRequireBidi(block->PrincipalChildList().FirstChild(),
&currContent)) {
bpd.mRequiresBidi = true;
// Optimization for TenFourFox issue 482:
// It's safe to break here if mRequiresBidi is true because we'll
// pull the state bits off in the loop below (if we didn't, we would
// essentially cause bug 1362423). This also allows us to reduce
// checking mRequiresBidi all the time.
break;
}
#if(0)
// XXX: Unnecessary work given that the below isn't executed.
// If we need bidi support for overflow, we need to enable this too.
/* if (!bpd.mRequiresBidi) { */
nsBlockFrame::FrameLines* overflowLines = block->GetOverflowLines();
if (overflowLines) { // XXX: see below
if (ChildListMayRequireBidi(overflowLines->mFrames.FirstChild(),
&currContent)) {
bpd.mRequiresBidi = true;
break; // as above
}
}
/* } */
#endif
}
if (!bpd.mRequiresBidi) {
return NS_OK;
}
}
for (nsBlockFrame* block = aBlockFrame; block;
block = static_cast<nsBlockFrame*>(block->GetNextContinuation())) {
block->RemoveStateBits(NS_BLOCK_NEEDS_BIDI_RESOLUTION);
nsBlockInFlowLineIterator lineIter(block, block->begin_lines());
bpd.ResetForNewBlock();
TraverseFrames(aBlockFrame, &lineIter, block->GetFirstPrincipalChild(), &bpd);
// XXX what about overflow lines?
#if(0)
nsBlockFrame::FrameLines* overflowLines = block->GetOverflowLines();
// XXX: Not sure if this is going to break anything.
// It's safe to do above (from bug 1358275) because that doesn't actually
// bidi-process the overflow lines; it just checks if we need to.
if (overflowLines) {
nsBlockInFlowLineIterator it(block, overflowLines->mLines.begin(), true);
bpd.ResetForNewBlock();
TraverseFrames(aBlockFrame, &it, overflowLines->mFrames.FirstChild(), &bpd);
}
#endif
}
if (ch != 0) {
@ -1241,6 +1296,56 @@ nsBidiPresUtils::TraverseFrames(nsBlockFrame* aBlockFrame,
MOZ_ASSERT(initialLineContainer == aLineIter->GetContainer());
}
bool
nsBidiPresUtils::ChildListMayRequireBidi(nsIFrame* aFirstChild,
nsIContent** aCurrContent)
{
MOZ_ASSERT(!aFirstChild || !aFirstChild->GetPrevSibling(),
"Expecting to traverse from the start of a child list");
for (nsIFrame* childFrame = aFirstChild; childFrame;
childFrame = childFrame->GetNextSibling()) {
nsIFrame* frame = childFrame;
// If the real frame for a placeholder is a first-letter frame, we need to
// consider its contents for potential Bidi resolution.
if (childFrame->GetType() == nsGkAtoms::placeholderFrame) {
nsIFrame* realFrame =
nsPlaceholderFrame::GetRealFrameForPlaceholder(childFrame);
if (realFrame->GetType() == nsGkAtoms::letterFrame) {
frame = realFrame;
}
}
// If unicode-bidi properties are present, we should do bidi resolution.
nsStyleContext* sc = frame->StyleContext();
if (GetBidiControl(sc, kOverrideOrEmbed)) {
return true;
}
if (IsBidiLeaf(frame)) {
if (frame->GetType() == nsGkAtoms::textFrame) {
// Check whether the text frame has any RTL characters; if so, bidi
// resolution will be needed.
nsIContent* content = frame->GetContent();
if (content != *aCurrContent) {
*aCurrContent = content;
const nsTextFragment* txt = content->GetText();
if (txt->Is2b() && HasRTLChars(txt->Get2b(), txt->GetLength())) {
return true;
}
}
}
} else if (ChildListMayRequireBidi(frame->PrincipalChildList().FirstChild(),
aCurrContent)) {
return true;
}
}
return false;
}
void
nsBidiPresUtils::ResolveParagraphWithinBlock(nsBlockFrame* aBlockFrame,
BidiParagraphData* aBpd)

View File

@ -399,6 +399,23 @@ private:
nsIFrame* aCurrentFrame,
BidiParagraphData* aBpd);
/**
* Perform a recursive "pre-traversal" of the child frames of a block or
* inline container frame, to determine whether full bidi resolution is
* actually needed.
* This explores the same frames as TraverseFrames (above), but is less
* expensive and may allow us to avoid performing the full TraverseFrames
* operation.
* @param aFirstChild frame to start traversal from
* @param[in/out] aCurrContent the content node that we've most recently
* scanned for RTL characters (so that when descendant frames refer
* to the same content, we can avoid repeatedly scanning it).
* @return true if it finds that bidi is (or may be) required,
* false if no potentially-bidi content is present.
*/
static bool ChildListMayRequireBidi(nsIFrame* aFirstChild,
nsIContent** aCurrContent);
/**
* Position ruby content frames (ruby base/text frame).
* Called from RepositionRubyFrame.