mirror of
https://github.com/classilla/tenfourfox.git
synced 2024-11-05 02:06:25 +00:00
398 lines
14 KiB
C++
398 lines
14 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
/* tokenization of CSS style sheets */
|
|
|
|
#ifndef nsCSSScanner_h___
|
|
#define nsCSSScanner_h___
|
|
|
|
#include "nsString.h"
|
|
|
|
namespace mozilla {
|
|
namespace css {
|
|
class ErrorReporter;
|
|
} // namespace css
|
|
} // namespace mozilla
|
|
|
|
// Token types; in close but not perfect correspondence to the token
|
|
// categorization in section 4.1.1 of CSS2.1. (The deviations are all
|
|
// the fault of css3-selectors, which has requirements that can only be
|
|
// met by changing the generic tokenization.) The comment on each line
|
|
// illustrates the form of each identifier.
|
|
|
|
enum nsCSSTokenType {
|
|
// White space of any kind. No value fields are used. Note that
|
|
// comments do *not* count as white space; comments separate tokens
|
|
// but are not themselves tokens.
|
|
eCSSToken_Whitespace, //
|
|
// A comment.
|
|
eCSSToken_Comment, // /*...*/
|
|
|
|
// Identifier-like tokens. mIdent is the text of the identifier.
|
|
// The difference between ID and Hash is: if the text after the #
|
|
// would have been a valid Ident if the # hadn't been there, the
|
|
// scanner produces an ID token. Otherwise it produces a Hash token.
|
|
// (This distinction is required by css3-selectors.)
|
|
eCSSToken_Ident, // word
|
|
eCSSToken_Function, // word(
|
|
eCSSToken_AtKeyword, // @word
|
|
eCSSToken_ID, // #word
|
|
eCSSToken_Hash, // #0word
|
|
|
|
// Numeric tokens. mNumber is the floating-point value of the
|
|
// number, and mHasSign indicates whether there was an explicit sign
|
|
// (+ or -) in front of the number. If mIntegerValid is true, the
|
|
// number had the lexical form of an integer, and mInteger is its
|
|
// integer value. Lexically integer values outside the range of a
|
|
// 32-bit signed number are clamped to the maximum values; mNumber
|
|
// will indicate a 'truer' value in that case. Percentage tokens
|
|
// are always considered not to be integers, even if their numeric
|
|
// value is integral (100% => mNumber = 1.0). For Dimension
|
|
// tokens, mIdent holds the text of the unit.
|
|
eCSSToken_Number, // 1 -5 +2e3 3.14159 7.297352e-3
|
|
eCSSToken_Dimension, // 24px 8.5in
|
|
eCSSToken_Percentage, // 85% 1280.4%
|
|
|
|
// String-like tokens. In all cases, mIdent holds the text
|
|
// belonging to the string, and mSymbol holds the delimiter
|
|
// character, which may be ', ", or zero (only for unquoted URLs).
|
|
// Bad_String and Bad_URL tokens are emitted when the closing
|
|
// delimiter or parenthesis was missing.
|
|
eCSSToken_String, // 'foo bar' "foo bar"
|
|
eCSSToken_Bad_String, // 'foo bar
|
|
eCSSToken_URL, // url(foobar) url("foo bar")
|
|
eCSSToken_Bad_URL, // url(foo
|
|
|
|
// Any one-character symbol. mSymbol holds the character.
|
|
eCSSToken_Symbol, // . ; { } ! *
|
|
|
|
// Match operators. These are single tokens rather than pairs of
|
|
// Symbol tokens because css3-selectors forbids the presence of
|
|
// comments between the two characters. No value fields are used;
|
|
// the token type indicates which operator.
|
|
eCSSToken_Includes, // ~=
|
|
eCSSToken_Dashmatch, // |=
|
|
eCSSToken_Beginsmatch, // ^=
|
|
eCSSToken_Endsmatch, // $=
|
|
eCSSToken_Containsmatch, // *=
|
|
|
|
// Unicode-range token: currently used only in @font-face.
|
|
// The lexical rule for this token includes several forms that are
|
|
// semantically invalid. Therefore, mIdent always holds the
|
|
// complete original text of the token (so we can print it
|
|
// accurately in diagnostics), and mIntegerValid is true iff the
|
|
// token is semantically valid. In that case, mInteger holds the
|
|
// lowest value included in the range, and mInteger2 holds the
|
|
// highest value included in the range.
|
|
eCSSToken_URange, // U+007e U+01?? U+2000-206F
|
|
|
|
// HTML comment delimiters, ignored as a unit when they appear at
|
|
// the top level of a style sheet, for compatibility with websites
|
|
// written for compatibility with pre-CSS browsers. This token type
|
|
// subsumes the css2.1 CDO and CDC tokens, which are always treated
|
|
// the same by the parser. mIdent holds the text of the token, for
|
|
// diagnostics.
|
|
eCSSToken_HTMLComment, // <!-- -->
|
|
};
|
|
|
|
// Classification of tokens used to determine if a "/**/" string must be
|
|
// inserted if pasting token streams together when serializing. We include
|
|
// values corresponding to eCSSToken_Dashmatch and eCSSToken_Containsmatch,
|
|
// as css-syntax does not treat these as whole tokens, but we will still
|
|
// need to insert a "/**/" string between a '|' delim and a '|=' dashmatch
|
|
// and between a '/' delim and a '*=' containsmatch.
|
|
//
|
|
// https://drafts.csswg.org/css-syntax/#serialization
|
|
enum nsCSSTokenSerializationType {
|
|
eCSSTokenSerialization_Nothing,
|
|
eCSSTokenSerialization_Whitespace,
|
|
eCSSTokenSerialization_AtKeyword_or_Hash,
|
|
eCSSTokenSerialization_Number,
|
|
eCSSTokenSerialization_Dimension,
|
|
eCSSTokenSerialization_Percentage,
|
|
eCSSTokenSerialization_URange,
|
|
eCSSTokenSerialization_URL_or_BadURL,
|
|
eCSSTokenSerialization_Function,
|
|
eCSSTokenSerialization_Ident,
|
|
eCSSTokenSerialization_CDC,
|
|
eCSSTokenSerialization_DashMatch,
|
|
eCSSTokenSerialization_ContainsMatch,
|
|
eCSSTokenSerialization_Symbol_Hash, // '#'
|
|
eCSSTokenSerialization_Symbol_At, // '@'
|
|
eCSSTokenSerialization_Symbol_Dot_or_Plus, // '.', '+'
|
|
eCSSTokenSerialization_Symbol_Minus, // '-'
|
|
eCSSTokenSerialization_Symbol_OpenParen, // '('
|
|
eCSSTokenSerialization_Symbol_Question, // '?'
|
|
eCSSTokenSerialization_Symbol_Assorted, // '$', '^', '~'
|
|
eCSSTokenSerialization_Symbol_Equals, // '='
|
|
eCSSTokenSerialization_Symbol_Bar, // '|'
|
|
eCSSTokenSerialization_Symbol_Slash, // '/'
|
|
eCSSTokenSerialization_Symbol_Asterisk, // '*'
|
|
eCSSTokenSerialization_Other // anything else
|
|
};
|
|
|
|
// A single token returned from the scanner. mType is always
|
|
// meaningful; comments above describe which other fields are
|
|
// meaningful for which token types.
|
|
struct nsCSSToken {
|
|
nsAutoString mIdent;
|
|
float mNumber;
|
|
int32_t mInteger;
|
|
int32_t mInteger2;
|
|
nsCSSTokenType mType;
|
|
char16_t mSymbol;
|
|
bool mIntegerValid;
|
|
bool mHasSign;
|
|
|
|
nsCSSToken()
|
|
: mNumber(0), mInteger(0), mInteger2(0), mType(eCSSToken_Whitespace),
|
|
mSymbol('\0'), mIntegerValid(false), mHasSign(false)
|
|
{}
|
|
|
|
bool IsSymbol(char16_t aSymbol) const {
|
|
return mType == eCSSToken_Symbol && mSymbol == aSymbol;
|
|
}
|
|
|
|
void AppendToString(nsString& aBuffer) const;
|
|
};
|
|
|
|
// Represents an nsCSSScanner's saved position in the input buffer.
|
|
class nsCSSScannerPosition {
|
|
friend class nsCSSScanner;
|
|
public:
|
|
nsCSSScannerPosition() : mInitialized(false) { }
|
|
|
|
uint32_t LineNumber() {
|
|
MOZ_ASSERT(mInitialized);
|
|
return mLineNumber;
|
|
}
|
|
|
|
uint32_t LineOffset() {
|
|
MOZ_ASSERT(mInitialized);
|
|
return mLineOffset;
|
|
}
|
|
|
|
private:
|
|
uint32_t mOffset;
|
|
uint32_t mLineNumber;
|
|
uint32_t mLineOffset;
|
|
uint32_t mTokenLineNumber;
|
|
uint32_t mTokenLineOffset;
|
|
uint32_t mTokenOffset;
|
|
bool mInitialized;
|
|
};
|
|
|
|
enum nsCSSScannerExclude {
|
|
// Return all tokens, including whitespace and comments.
|
|
eCSSScannerExclude_None,
|
|
// Include whitespace but exclude comments.
|
|
eCSSScannerExclude_Comments,
|
|
// Exclude whitespace and comments.
|
|
eCSSScannerExclude_WhitespaceAndComments
|
|
};
|
|
|
|
// nsCSSScanner tokenizes an input stream using the CSS2.1 forward
|
|
// compatible tokenization rules. Used internally by nsCSSParser;
|
|
// not available for use by other code.
|
|
class nsCSSScanner {
|
|
public:
|
|
// |aLineNumber == 1| is the beginning of a file, use |aLineNumber == 0|
|
|
// when the line number is unknown. The scanner does not take
|
|
// ownership of |aBuffer|, so the caller must be sure to keep it
|
|
// alive for the lifetime of the scanner.
|
|
nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber);
|
|
~nsCSSScanner();
|
|
|
|
void SetErrorReporter(mozilla::css::ErrorReporter* aReporter) {
|
|
mReporter = aReporter;
|
|
}
|
|
// Set whether or not we are processing SVG
|
|
void SetSVGMode(bool aSVGMode) {
|
|
mSVGMode = aSVGMode;
|
|
}
|
|
bool IsSVGMode() const {
|
|
return mSVGMode;
|
|
}
|
|
|
|
// Reset or check whether a BAD_URL or BAD_STRING token has been seen.
|
|
void ClearSeenBadToken() { mSeenBadToken = false; }
|
|
bool SeenBadToken() const { return mSeenBadToken; }
|
|
|
|
// Reset or check whether a "var(" FUNCTION token has been seen.
|
|
void ClearSeenVariableReference() { mSeenVariableReference = false; }
|
|
bool SeenVariableReference() const { return mSeenVariableReference; }
|
|
|
|
// Get the 1-based line number of the last character of
|
|
// the most recently processed token.
|
|
uint32_t GetLineNumber() const { return mTokenLineNumber; }
|
|
|
|
// Get the 0-based column number of the first character of
|
|
// the most recently processed token.
|
|
uint32_t GetColumnNumber() const
|
|
{ return mTokenOffset - mTokenLineOffset; }
|
|
|
|
uint32_t GetTokenOffset() const
|
|
{ return mTokenOffset; }
|
|
|
|
uint32_t GetTokenEndOffset() const
|
|
{ return mOffset; }
|
|
|
|
// Get the text of the line containing the first character of
|
|
// the most recently processed token.
|
|
nsDependentSubstring GetCurrentLine() const;
|
|
|
|
// Get the next token. Return false on EOF. aTokenResult is filled
|
|
// in with the data for the token. aSkip controls whether
|
|
// whitespace and/or comment tokens are ever returned.
|
|
bool Next(nsCSSToken& aTokenResult, nsCSSScannerExclude aSkip);
|
|
|
|
// Get the body of an URL token (everything after the 'url(').
|
|
// This is exposed for use by nsCSSParser::ParseMozDocumentRule,
|
|
// which, for historical reasons, must make additional function
|
|
// tokens behave like url(). Please do not add new uses to the
|
|
// parser.
|
|
void NextURL(nsCSSToken& aTokenResult);
|
|
|
|
// This is exposed for use by nsCSSParser::ParsePseudoClassWithNthPairArg,
|
|
// because "2n-1" is a single DIMENSION token, and "n-1" is a single
|
|
// IDENT token, but the :nth() selector syntax wants to interpret
|
|
// them the same as "2n -1" and "n -1" respectively. Please do not
|
|
// add new uses to the parser.
|
|
//
|
|
// Note: this function may not be used to back up over a line boundary.
|
|
void Backup(uint32_t n);
|
|
|
|
// Starts recording the input stream from the current position.
|
|
void StartRecording();
|
|
|
|
// Abandons recording of the input stream.
|
|
void StopRecording();
|
|
|
|
// Stops recording of the input stream and appends the recorded
|
|
// input to aBuffer.
|
|
void StopRecording(nsString& aBuffer);
|
|
|
|
// Returns the length of the current recording.
|
|
uint32_t RecordingLength() const;
|
|
|
|
#ifdef DEBUG
|
|
bool IsRecording() const;
|
|
#endif
|
|
|
|
// Stores the current scanner offset into the specified object.
|
|
void SavePosition(nsCSSScannerPosition& aState);
|
|
|
|
// Resets the scanner offset to a position saved by SavePosition.
|
|
void RestoreSavedPosition(const nsCSSScannerPosition& aState);
|
|
|
|
enum EOFCharacters {
|
|
eEOFCharacters_None = 0x0000,
|
|
|
|
// to handle \<EOF> inside strings
|
|
eEOFCharacters_DropBackslash = 0x0001,
|
|
|
|
// to handle \<EOF> outside strings
|
|
eEOFCharacters_ReplacementChar = 0x0002,
|
|
|
|
// to close comments
|
|
eEOFCharacters_Asterisk = 0x0004,
|
|
eEOFCharacters_Slash = 0x0008,
|
|
|
|
// to close double-quoted strings
|
|
eEOFCharacters_DoubleQuote = 0x0010,
|
|
|
|
// to close single-quoted strings
|
|
eEOFCharacters_SingleQuote = 0x0020,
|
|
|
|
// to close URLs
|
|
eEOFCharacters_CloseParen = 0x0040,
|
|
};
|
|
|
|
// Appends any characters to the specified string the input stream to make the
|
|
// last token not rely on special EOF handling behavior.
|
|
//
|
|
// If eEOFCharacters_DropBackslash is in aEOFCharacters, it is ignored.
|
|
static void AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters,
|
|
nsAString& aString);
|
|
|
|
EOFCharacters GetEOFCharacters() const {
|
|
#ifdef DEBUG
|
|
AssertEOFCharactersValid(mEOFCharacters);
|
|
#endif
|
|
return mEOFCharacters;
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
static void AssertEOFCharactersValid(uint32_t c);
|
|
#endif
|
|
|
|
protected:
|
|
int32_t Peek(uint32_t n = 0);
|
|
void Advance(uint32_t n = 1);
|
|
void AdvanceLine();
|
|
|
|
void SkipWhitespace();
|
|
void SkipComment();
|
|
|
|
bool GatherEscape(nsString& aOutput, bool aInString);
|
|
bool GatherText(uint8_t aClass, nsString& aIdent);
|
|
|
|
bool ScanIdent(nsCSSToken& aResult);
|
|
bool ScanAtKeyword(nsCSSToken& aResult);
|
|
bool ScanHash(nsCSSToken& aResult);
|
|
bool ScanNumber(nsCSSToken& aResult);
|
|
bool ScanString(nsCSSToken& aResult);
|
|
bool ScanURange(nsCSSToken& aResult);
|
|
|
|
void SetEOFCharacters(uint32_t aEOFCharacters);
|
|
void AddEOFCharacters(uint32_t aEOFCharacters);
|
|
|
|
const char16_t *mBuffer;
|
|
uint32_t mOffset;
|
|
uint32_t mCount;
|
|
|
|
uint32_t mLineNumber;
|
|
uint32_t mLineOffset;
|
|
|
|
uint32_t mTokenLineNumber;
|
|
uint32_t mTokenLineOffset;
|
|
uint32_t mTokenOffset;
|
|
|
|
uint32_t mRecordStartOffset;
|
|
EOFCharacters mEOFCharacters;
|
|
|
|
mozilla::css::ErrorReporter *mReporter;
|
|
|
|
// True if we are in SVG mode; false in "normal" CSS
|
|
bool mSVGMode;
|
|
bool mRecording;
|
|
bool mSeenBadToken;
|
|
bool mSeenVariableReference;
|
|
};
|
|
|
|
// Token for the grid-template-areas micro-syntax
|
|
// http://dev.w3.org/csswg/css-grid/#propdef-grid-template-areas
|
|
struct MOZ_STACK_CLASS nsCSSGridTemplateAreaToken {
|
|
nsAutoString mName; // Empty for a null cell, non-empty for a named cell
|
|
bool isTrash; // True for a trash token, mName is ignored in this case.
|
|
};
|
|
|
|
// Scanner for the grid-template-areas micro-syntax
|
|
class nsCSSGridTemplateAreaScanner {
|
|
public:
|
|
explicit nsCSSGridTemplateAreaScanner(const nsAString& aBuffer);
|
|
|
|
// Get the next token. Return false on EOF.
|
|
// aTokenResult is filled in with the data for the token.
|
|
bool Next(nsCSSGridTemplateAreaToken& aTokenResult);
|
|
|
|
private:
|
|
const char16_t *mBuffer;
|
|
uint32_t mOffset;
|
|
uint32_t mCount;
|
|
};
|
|
|
|
#endif /* nsCSSScanner_h___ */
|