mirror of
https://github.com/classilla/tenfourfox.git
synced 2024-06-06 22:29:34 +00:00
167 lines
6.5 KiB
C++
167 lines
6.5 KiB
C++
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#ifndef mozInlineSpellWordUtil_h
|
|
#define mozInlineSpellWordUtil_h
|
|
|
|
#include "nsCOMPtr.h"
|
|
#include "nsIDOMDocument.h"
|
|
#include "nsIDocument.h"
|
|
#include "nsString.h"
|
|
#include "nsTArray.h"
|
|
|
|
//#define DEBUG_SPELLCHECK
|
|
|
|
class nsRange;
|
|
class nsINode;
|
|
|
|
/**
|
|
* This class extracts text from the DOM and builds it into a single string.
|
|
* The string includes whitespace breaks whereever non-inline elements begin
|
|
* and end. This string is broken into "real words", following somewhat
|
|
* complex rules; for example substrings that look like URLs or
|
|
* email addresses are treated as single words, but otherwise many kinds of
|
|
* punctuation are treated as word separators. GetNextWord provides a way
|
|
* to iterate over these "real words".
|
|
*
|
|
* The basic operation is:
|
|
*
|
|
* 1. Call Init with the weak pointer to the editor that you're using.
|
|
* 2. Call SetEnd to set where you want to stop spellchecking. We'll stop
|
|
* at the word boundary after that. If SetEnd is not called, we'll stop
|
|
* at the end of the document's root element.
|
|
* 3. Call SetPosition to initialize the current position inside the
|
|
* previously given range.
|
|
* 4. Call GetNextWord over and over until it returns false.
|
|
*/
|
|
|
|
class mozInlineSpellWordUtil
|
|
{
|
|
public:
|
|
struct NodeOffset {
|
|
nsINode* mNode;
|
|
int32_t mOffset;
|
|
|
|
NodeOffset(nsINode* aNode, int32_t aOffset) :
|
|
mNode(aNode), mOffset(aOffset) {}
|
|
};
|
|
|
|
mozInlineSpellWordUtil()
|
|
: mRootNode(nullptr),
|
|
mSoftBegin(nullptr, 0), mSoftEnd(nullptr, 0),
|
|
mNextWordIndex(-1), mSoftTextValid(false) {}
|
|
|
|
nsresult Init(nsWeakPtr aWeakEditor);
|
|
|
|
nsresult SetEnd(nsINode* aEndNode, int32_t aEndOffset);
|
|
|
|
// sets the current position, this should be inside the range. If we are in
|
|
// the middle of a word, we'll move to its start.
|
|
nsresult SetPosition(nsINode* aNode, int32_t aOffset);
|
|
|
|
// Given a point inside or immediately following a word, this returns the
|
|
// DOM range that exactly encloses that word's characters. The current
|
|
// position will be at the end of the word. This will find the previous
|
|
// word if the current position is space, so if you care that the point is
|
|
// inside the word, you should check the range.
|
|
//
|
|
// THIS CHANGES THE CURRENT POSITION AND RANGE. It is designed to be called
|
|
// before you actually generate the range you are interested in and iterate
|
|
// the words in it.
|
|
nsresult GetRangeForWord(nsIDOMNode* aWordNode, int32_t aWordOffset,
|
|
nsRange** aRange);
|
|
|
|
// Moves to the the next word in the range, and retrieves it's text and range.
|
|
// An empty word and a nullptr range are returned when we are done checking.
|
|
// aSkipChecking will be set if the word is "special" and shouldn't be
|
|
// checked (e.g., an email address).
|
|
nsresult GetNextWord(nsAString& aText, nsRange** aRange,
|
|
bool* aSkipChecking);
|
|
|
|
// Call to normalize some punctuation. This function takes an autostring
|
|
// so we can access characters directly.
|
|
static void NormalizeWord(nsSubstring& aWord);
|
|
|
|
nsIDOMDocument* GetDOMDocument() const { return mDOMDocument; }
|
|
nsIDocument* GetDocument() const { return mDocument; }
|
|
nsINode* GetRootNode() { return mRootNode; }
|
|
|
|
private:
|
|
|
|
// cached stuff for the editor, set by Init
|
|
nsCOMPtr<nsIDOMDocument> mDOMDocument;
|
|
nsCOMPtr<nsIDocument> mDocument;
|
|
|
|
// range to check, see SetPosition and SetEnd
|
|
nsINode* mRootNode;
|
|
NodeOffset mSoftBegin;
|
|
NodeOffset mSoftEnd;
|
|
|
|
// DOM text covering the soft range, with newlines added at block boundaries
|
|
nsString mSoftText;
|
|
// A list of where we extracted text from, ordered by mSoftTextOffset. A given
|
|
// DOM node appears at most once in this list.
|
|
struct DOMTextMapping {
|
|
NodeOffset mNodeOffset;
|
|
int32_t mSoftTextOffset;
|
|
int32_t mLength;
|
|
|
|
DOMTextMapping(NodeOffset aNodeOffset, int32_t aSoftTextOffset, int32_t aLength)
|
|
: mNodeOffset(aNodeOffset), mSoftTextOffset(aSoftTextOffset),
|
|
mLength(aLength) {}
|
|
};
|
|
nsTArray<DOMTextMapping> mSoftTextDOMMapping;
|
|
|
|
// A list of the "real words" in mSoftText, ordered by mSoftTextOffset
|
|
struct RealWord {
|
|
int32_t mSoftTextOffset;
|
|
int32_t mLength;
|
|
bool mCheckableWord;
|
|
|
|
RealWord(int32_t aOffset, int32_t aLength, bool aCheckable)
|
|
: mSoftTextOffset(aOffset), mLength(aLength), mCheckableWord(aCheckable) {}
|
|
int32_t EndOffset() const { return mSoftTextOffset + mLength; }
|
|
};
|
|
nsTArray<RealWord> mRealWords;
|
|
int32_t mNextWordIndex;
|
|
|
|
bool mSoftTextValid;
|
|
|
|
void InvalidateWords() { mSoftTextValid = false; }
|
|
void EnsureWords();
|
|
|
|
int32_t MapDOMPositionToSoftTextOffset(NodeOffset aNodeOffset);
|
|
// Map an offset into mSoftText to a DOM position. Note that two DOM positions
|
|
// can map to the same mSoftText offset, e.g. given nodes A=aaaa and B=bbbb
|
|
// forming aaaabbbb, (A,4) and (B,0) give the same string offset. So,
|
|
// aHintBefore controls which position we return ... if aHint is eEnd
|
|
// then the position indicates the END of a range so we return (A,4). Otherwise
|
|
// the position indicates the START of a range so we return (B,0).
|
|
enum DOMMapHint { HINT_BEGIN, HINT_END };
|
|
NodeOffset MapSoftTextOffsetToDOMPosition(int32_t aSoftTextOffset,
|
|
DOMMapHint aHint);
|
|
// Finds the index of the real word containing aSoftTextOffset, or -1 if none
|
|
// If it's exactly between two words, then if aHint is HINT_BEGIN, return the
|
|
// later word (favouring the assumption that it's the BEGINning of a word),
|
|
// otherwise return the earlier word (assuming it's the END of a word).
|
|
// If aSearchForward is true, then if we don't find a word at the given
|
|
// position, search forward until we do find a word and return that (if found).
|
|
int32_t FindRealWordContaining(int32_t aSoftTextOffset, DOMMapHint aHint,
|
|
bool aSearchForward);
|
|
|
|
// build mSoftText and mSoftTextDOMMapping
|
|
void BuildSoftText();
|
|
// Build mRealWords array
|
|
void BuildRealWords();
|
|
|
|
void SplitDOMWord(int32_t aStart, int32_t aEnd);
|
|
|
|
// Convenience functions, object must be initialized
|
|
nsresult MakeRange(NodeOffset aBegin, NodeOffset aEnd, nsRange** aRange);
|
|
nsresult MakeRangeForWord(const RealWord& aWord, nsRange** aRange);
|
|
};
|
|
|
|
#endif
|