mirror of
https://github.com/classilla/tenfourfox.git
synced 2025-01-06 09:29:35 +00:00
385 lines
13 KiB
C++
385 lines
13 KiB
C++
/*
|
|
* Copyright (c) 2005-2007 Henri Sivonen
|
|
* Copyright (c) 2007-2015 Mozilla Foundation
|
|
* Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
|
|
* Foundation, and Opera Software ASA.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
/*
|
|
* THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
|
|
* Please edit Tokenizer.java instead and regenerate.
|
|
*/
|
|
|
|
#ifndef nsHtml5Tokenizer_h
|
|
#define nsHtml5Tokenizer_h
|
|
|
|
#include "nsIAtom.h"
|
|
#include "nsHtml5AtomTable.h"
|
|
#include "nsString.h"
|
|
#include "nsIContent.h"
|
|
#include "nsTraceRefcnt.h"
|
|
#include "jArray.h"
|
|
#include "nsHtml5DocumentMode.h"
|
|
#include "nsHtml5ArrayCopy.h"
|
|
#include "nsHtml5NamedCharacters.h"
|
|
#include "nsHtml5NamedCharactersAccel.h"
|
|
#include "nsHtml5Atoms.h"
|
|
#include "nsAHtml5TreeBuilderState.h"
|
|
#include "nsHtml5Macros.h"
|
|
#include "nsHtml5Highlighter.h"
|
|
#include "nsHtml5TokenizerLoopPolicies.h"
|
|
|
|
class nsHtml5StreamParser;
|
|
|
|
class nsHtml5TreeBuilder;
|
|
class nsHtml5MetaScanner;
|
|
class nsHtml5AttributeName;
|
|
class nsHtml5ElementName;
|
|
class nsHtml5HtmlAttributes;
|
|
class nsHtml5UTF16Buffer;
|
|
class nsHtml5StateSnapshot;
|
|
class nsHtml5Portability;
|
|
|
|
|
|
class nsHtml5Tokenizer
|
|
{
|
|
private:
|
|
static char16_t LT_GT[];
|
|
static char16_t LT_SOLIDUS[];
|
|
static char16_t RSQB_RSQB[];
|
|
static char16_t REPLACEMENT_CHARACTER[];
|
|
static char16_t LF[];
|
|
static char16_t CDATA_LSQB[];
|
|
static char16_t OCTYPE[];
|
|
static char16_t UBLIC[];
|
|
static char16_t YSTEM[];
|
|
static staticJArray<char16_t,int32_t> TITLE_ARR;
|
|
static staticJArray<char16_t,int32_t> SCRIPT_ARR;
|
|
static staticJArray<char16_t,int32_t> STYLE_ARR;
|
|
static staticJArray<char16_t,int32_t> PLAINTEXT_ARR;
|
|
static staticJArray<char16_t,int32_t> XMP_ARR;
|
|
static staticJArray<char16_t,int32_t> TEXTAREA_ARR;
|
|
static staticJArray<char16_t,int32_t> IFRAME_ARR;
|
|
static staticJArray<char16_t,int32_t> NOEMBED_ARR;
|
|
static staticJArray<char16_t,int32_t> NOSCRIPT_ARR;
|
|
static staticJArray<char16_t,int32_t> NOFRAMES_ARR;
|
|
protected:
|
|
nsHtml5TreeBuilder* tokenHandler;
|
|
nsHtml5StreamParser* encodingDeclarationHandler;
|
|
bool lastCR;
|
|
int32_t stateSave;
|
|
private:
|
|
int32_t returnStateSave;
|
|
protected:
|
|
int32_t index;
|
|
private:
|
|
bool forceQuirks;
|
|
char16_t additional;
|
|
int32_t entCol;
|
|
int32_t firstCharKey;
|
|
int32_t lo;
|
|
int32_t hi;
|
|
int32_t candidate;
|
|
int32_t charRefBufMark;
|
|
protected:
|
|
int32_t value;
|
|
private:
|
|
bool seenDigits;
|
|
protected:
|
|
int32_t cstart;
|
|
private:
|
|
nsString* publicId;
|
|
nsString* systemId;
|
|
autoJArray<char16_t,int32_t> strBuf;
|
|
int32_t strBufLen;
|
|
autoJArray<char16_t,int32_t> charRefBuf;
|
|
int32_t charRefBufLen;
|
|
autoJArray<char16_t,int32_t> bmpChar;
|
|
autoJArray<char16_t,int32_t> astralChar;
|
|
protected:
|
|
nsHtml5ElementName* endTagExpectation;
|
|
private:
|
|
jArray<char16_t,int32_t> endTagExpectationAsArray;
|
|
protected:
|
|
bool endTag;
|
|
private:
|
|
nsHtml5ElementName* tagName;
|
|
protected:
|
|
nsHtml5AttributeName* attributeName;
|
|
private:
|
|
nsIAtom* doctypeName;
|
|
nsString* publicIdentifier;
|
|
nsString* systemIdentifier;
|
|
nsHtml5HtmlAttributes* attributes;
|
|
bool newAttributesEachTime;
|
|
bool shouldSuspend;
|
|
protected:
|
|
bool confident;
|
|
private:
|
|
int32_t line;
|
|
nsHtml5AtomTable* interner;
|
|
bool viewingXmlSource;
|
|
public:
|
|
nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, bool viewingXmlSource);
|
|
void setInterner(nsHtml5AtomTable* interner);
|
|
void initLocation(nsString* newPublicId, nsString* newSystemId);
|
|
bool isViewingXmlSource();
|
|
void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsIAtom* endTagExpectation);
|
|
void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsHtml5ElementName* endTagExpectation);
|
|
private:
|
|
void endTagExpectationToArray();
|
|
public:
|
|
void setLineNumber(int32_t line);
|
|
inline int32_t getLineNumber()
|
|
{
|
|
return line;
|
|
}
|
|
|
|
nsHtml5HtmlAttributes* emptyAttributes();
|
|
private:
|
|
inline void appendCharRefBuf(char16_t c)
|
|
{
|
|
MOZ_RELEASE_ASSERT(charRefBufLen < charRefBuf.length, "Attempted to overrun charRefBuf!");
|
|
charRefBuf[charRefBufLen++] = c;
|
|
}
|
|
|
|
inline void clearCharRefBufAndAppend(char16_t c)
|
|
{
|
|
charRefBuf[0] = c;
|
|
charRefBufLen = 1;
|
|
}
|
|
|
|
void emitOrAppendCharRefBuf(int32_t returnState);
|
|
inline void clearStrBufAndAppend(char16_t c)
|
|
{
|
|
strBuf[0] = c;
|
|
strBufLen = 1;
|
|
}
|
|
|
|
inline void clearStrBuf()
|
|
{
|
|
strBufLen = 0;
|
|
}
|
|
|
|
inline void appendStrBuf(char16_t c)
|
|
{
|
|
MOZ_ASSERT(strBufLen < strBuf.length, "Previous buffer length insufficient.");
|
|
if (MOZ_UNLIKELY(strBufLen == strBuf.length)) {
|
|
if (MOZ_UNLIKELY(!EnsureBufferSpace(1))) {
|
|
MOZ_CRASH("Unable to recover from buffer reallocation failure");
|
|
}
|
|
}
|
|
strBuf[strBufLen++] = c;
|
|
}
|
|
|
|
protected:
|
|
nsString* strBufToString();
|
|
private:
|
|
void strBufToDoctypeName();
|
|
void emitStrBuf();
|
|
inline void appendSecondHyphenToBogusComment()
|
|
{
|
|
appendStrBuf('-');
|
|
}
|
|
|
|
inline void adjustDoubleHyphenAndAppendToStrBufAndErr(char16_t c)
|
|
{
|
|
errConsecutiveHyphens();
|
|
appendStrBuf(c);
|
|
}
|
|
|
|
void appendStrBuf(char16_t* buffer, int32_t offset, int32_t length);
|
|
inline void appendCharRefBufToStrBuf()
|
|
{
|
|
appendStrBuf(charRefBuf, 0, charRefBufLen);
|
|
}
|
|
|
|
void emitComment(int32_t provisionalHyphens, int32_t pos);
|
|
protected:
|
|
void flushChars(char16_t* buf, int32_t pos);
|
|
private:
|
|
void strBufToElementNameString();
|
|
int32_t emitCurrentTagToken(bool selfClosing, int32_t pos);
|
|
void attributeNameComplete();
|
|
void addAttributeWithoutValue();
|
|
void addAttributeWithValue();
|
|
public:
|
|
void start();
|
|
bool tokenizeBuffer(nsHtml5UTF16Buffer* buffer);
|
|
private:
|
|
template<class P> int32_t stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* buf, bool reconsume, int32_t returnState, int32_t endPos);
|
|
void initDoctypeFields();
|
|
inline void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
|
|
{
|
|
silentCarriageReturn();
|
|
adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
|
|
}
|
|
|
|
inline void adjustDoubleHyphenAndAppendToStrBufLineFeed()
|
|
{
|
|
silentLineFeed();
|
|
adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
|
|
}
|
|
|
|
inline void appendStrBufLineFeed()
|
|
{
|
|
silentLineFeed();
|
|
appendStrBuf('\n');
|
|
}
|
|
|
|
inline void appendStrBufCarriageReturn()
|
|
{
|
|
silentCarriageReturn();
|
|
appendStrBuf('\n');
|
|
}
|
|
|
|
protected:
|
|
inline void silentCarriageReturn()
|
|
{
|
|
++line;
|
|
lastCR = true;
|
|
}
|
|
|
|
inline void silentLineFeed()
|
|
{
|
|
++line;
|
|
}
|
|
|
|
private:
|
|
void emitCarriageReturn(char16_t* buf, int32_t pos);
|
|
void emitReplacementCharacter(char16_t* buf, int32_t pos);
|
|
void emitPlaintextReplacementCharacter(char16_t* buf, int32_t pos);
|
|
void setAdditionalAndRememberAmpersandLocation(char16_t add);
|
|
void bogusDoctype();
|
|
void bogusDoctypeWithoutQuirks();
|
|
void handleNcrValue(int32_t returnState);
|
|
public:
|
|
void eof();
|
|
private:
|
|
void emitDoctypeToken(int32_t pos);
|
|
protected:
|
|
inline char16_t checkChar(char16_t* buf, int32_t pos)
|
|
{
|
|
return buf[pos];
|
|
}
|
|
|
|
public:
|
|
bool internalEncodingDeclaration(nsString* internalCharset);
|
|
private:
|
|
void emitOrAppendTwo(const char16_t* val, int32_t returnState);
|
|
void emitOrAppendOne(const char16_t* val, int32_t returnState);
|
|
public:
|
|
void end();
|
|
void requestSuspension();
|
|
bool isInDataState();
|
|
void resetToDataState();
|
|
void loadState(nsHtml5Tokenizer* other);
|
|
void initializeWithoutStarting();
|
|
void setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler);
|
|
~nsHtml5Tokenizer();
|
|
static void initializeStatics();
|
|
static void releaseStatics();
|
|
|
|
#include "nsHtml5TokenizerHSupplement.h"
|
|
};
|
|
|
|
#define NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK ~1
|
|
#define NS_HTML5TOKENIZER_DATA 0
|
|
#define NS_HTML5TOKENIZER_RCDATA 1
|
|
#define NS_HTML5TOKENIZER_SCRIPT_DATA 2
|
|
#define NS_HTML5TOKENIZER_RAWTEXT 3
|
|
#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED 4
|
|
#define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_DOUBLE_QUOTED 5
|
|
#define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_SINGLE_QUOTED 6
|
|
#define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED 7
|
|
#define NS_HTML5TOKENIZER_PLAINTEXT 8
|
|
#define NS_HTML5TOKENIZER_TAG_OPEN 9
|
|
#define NS_HTML5TOKENIZER_CLOSE_TAG_OPEN 10
|
|
#define NS_HTML5TOKENIZER_TAG_NAME 11
|
|
#define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME 12
|
|
#define NS_HTML5TOKENIZER_ATTRIBUTE_NAME 13
|
|
#define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_NAME 14
|
|
#define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_VALUE 15
|
|
#define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED 16
|
|
#define NS_HTML5TOKENIZER_BOGUS_COMMENT 17
|
|
#define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN 18
|
|
#define NS_HTML5TOKENIZER_DOCTYPE 19
|
|
#define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_NAME 20
|
|
#define NS_HTML5TOKENIZER_DOCTYPE_NAME 21
|
|
#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_NAME 22
|
|
#define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER 23
|
|
#define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED 24
|
|
#define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED 25
|
|
#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER 26
|
|
#define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER 27
|
|
#define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED 28
|
|
#define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED 29
|
|
#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER 30
|
|
#define NS_HTML5TOKENIZER_BOGUS_DOCTYPE 31
|
|
#define NS_HTML5TOKENIZER_COMMENT_START 32
|
|
#define NS_HTML5TOKENIZER_COMMENT_START_DASH 33
|
|
#define NS_HTML5TOKENIZER_COMMENT 34
|
|
#define NS_HTML5TOKENIZER_COMMENT_END_DASH 35
|
|
#define NS_HTML5TOKENIZER_COMMENT_END 36
|
|
#define NS_HTML5TOKENIZER_COMMENT_END_BANG 37
|
|
#define NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME 38
|
|
#define NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN 39
|
|
#define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OCTYPE 40
|
|
#define NS_HTML5TOKENIZER_DOCTYPE_UBLIC 41
|
|
#define NS_HTML5TOKENIZER_DOCTYPE_YSTEM 42
|
|
#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_KEYWORD 43
|
|
#define NS_HTML5TOKENIZER_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS 44
|
|
#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_KEYWORD 45
|
|
#define NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE 46
|
|
#define NS_HTML5TOKENIZER_CONSUME_NCR 47
|
|
#define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_TAIL 48
|
|
#define NS_HTML5TOKENIZER_HEX_NCR_LOOP 49
|
|
#define NS_HTML5TOKENIZER_DECIMAL_NRC_LOOP 50
|
|
#define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE 51
|
|
#define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE_RECONSUME 52
|
|
#define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_HILO_LOOKUP 53
|
|
#define NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG 54
|
|
#define NS_HTML5TOKENIZER_CDATA_START 55
|
|
#define NS_HTML5TOKENIZER_CDATA_SECTION 56
|
|
#define NS_HTML5TOKENIZER_CDATA_RSQB 57
|
|
#define NS_HTML5TOKENIZER_CDATA_RSQB_RSQB 58
|
|
#define NS_HTML5TOKENIZER_SCRIPT_DATA_LESS_THAN_SIGN 59
|
|
#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START 60
|
|
#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START_DASH 61
|
|
#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH 62
|
|
#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH_DASH 63
|
|
#define NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN 64
|
|
#define NS_HTML5TOKENIZER_RAWTEXT_RCDATA_LESS_THAN_SIGN 65
|
|
#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN 66
|
|
#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_START 67
|
|
#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED 68
|
|
#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN 69
|
|
#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH 70
|
|
#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH 71
|
|
#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_END 72
|
|
#define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION 73
|
|
#define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK 74
|
|
#define NS_HTML5TOKENIZER_LEAD_OFFSET (0xD800 - (0x10000 >> 10))
|
|
|
|
|
|
#endif
|
|
|