mirror of
https://github.com/classilla/tenfourfox.git
synced 2024-09-27 14:56:16 +00:00
1080 lines
39 KiB
C++
1080 lines
39 KiB
C++
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
|
* vim: set ts=8 sts=4 et sw=4 tw=99:
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#ifndef frontend_TokenStream_h
|
|
#define frontend_TokenStream_h
|
|
|
|
// JS lexical scanner interface.
|
|
|
|
#include "mozilla/ArrayUtils.h"
|
|
#include "mozilla/Assertions.h"
|
|
#include "mozilla/Attributes.h"
|
|
#include "mozilla/DebugOnly.h"
|
|
#include "mozilla/PodOperations.h"
|
|
#include "mozilla/UniquePtr.h"
|
|
|
|
#include <stdarg.h>
|
|
#include <stddef.h>
|
|
#include <stdio.h>
|
|
|
|
#include "jscntxt.h"
|
|
#include "jspubtd.h"
|
|
|
|
#include "frontend/TokenKind.h"
|
|
#include "js/Vector.h"
|
|
#include "vm/RegExpObject.h"
|
|
|
|
struct KeywordInfo;
|
|
|
|
namespace js {
|
|
namespace frontend {
|
|
|
|
class AutoAwaitIsKeyword;
|
|
|
|
struct TokenPos {
|
|
uint32_t begin; // Offset of the token's first char.
|
|
uint32_t end; // Offset of 1 past the token's last char.
|
|
|
|
TokenPos() {}
|
|
TokenPos(uint32_t begin, uint32_t end) : begin(begin), end(end) {}
|
|
|
|
// Return a TokenPos that covers left, right, and anything in between.
|
|
static TokenPos box(const TokenPos& left, const TokenPos& right) {
|
|
MOZ_ASSERT(left.begin <= left.end);
|
|
MOZ_ASSERT(left.end <= right.begin);
|
|
MOZ_ASSERT(right.begin <= right.end);
|
|
return TokenPos(left.begin, right.end);
|
|
}
|
|
|
|
bool operator==(const TokenPos& bpos) const {
|
|
return begin == bpos.begin && end == bpos.end;
|
|
}
|
|
|
|
bool operator!=(const TokenPos& bpos) const {
|
|
return begin != bpos.begin || end != bpos.end;
|
|
}
|
|
|
|
bool operator <(const TokenPos& bpos) const {
|
|
return begin < bpos.begin;
|
|
}
|
|
|
|
bool operator <=(const TokenPos& bpos) const {
|
|
return begin <= bpos.begin;
|
|
}
|
|
|
|
bool operator >(const TokenPos& bpos) const {
|
|
return !(*this <= bpos);
|
|
}
|
|
|
|
bool operator >=(const TokenPos& bpos) const {
|
|
return !(*this < bpos);
|
|
}
|
|
|
|
bool encloses(const TokenPos& pos) const {
|
|
return begin <= pos.begin && pos.end <= end;
|
|
}
|
|
};
|
|
|
|
enum DecimalPoint { NoDecimal = false, HasDecimal = true };
|
|
|
|
class TokenStream;
|
|
|
|
struct Token
|
|
{
|
|
private:
|
|
// Sometimes the parser needs to inform the tokenizer to interpret
|
|
// subsequent text in a particular manner: for example, to tokenize a
|
|
// keyword as an identifier, not as the actual keyword, on the right-hand
|
|
// side of a dotted property access. Such information is communicated to
|
|
// the tokenizer as a Modifier when getting the next token.
|
|
//
|
|
// Ideally this definition would reside in TokenStream as that's the real
|
|
// user, but the debugging-use of it here causes a cyclic dependency (and
|
|
// C++ provides no way to forward-declare an enum inside a class). So
|
|
// define it here, then typedef it into TokenStream with static consts to
|
|
// bring the initializers into scope.
|
|
enum Modifier
|
|
{
|
|
// Normal operation.
|
|
None,
|
|
|
|
// Looking for an operand, not an operator. In practice, this means
|
|
// that when '/' is seen, we look for a regexp instead of just returning
|
|
// TOK_DIV.
|
|
Operand,
|
|
|
|
// Treat keywords as names by returning TOK_NAME.
|
|
KeywordIsName,
|
|
|
|
// Treat subsequent characters as the tail of a template literal, after
|
|
// a template substitution, beginning with a "}", continuing with zero
|
|
// or more template literal characters, and ending with either "${" or
|
|
// the end of the template literal. For example:
|
|
//
|
|
// var entity = "world";
|
|
// var s = `Hello ${entity}!`;
|
|
// ^ TemplateTail context
|
|
TemplateTail,
|
|
};
|
|
enum ModifierException
|
|
{
|
|
NoException,
|
|
|
|
// Used in following 2 cases:
|
|
// a) After |yield| we look for a token on the same line that starts an
|
|
// expression (Operand): |yield <expr>|. If no token is found, the
|
|
// |yield| stands alone, and the next token on a subsequent line must
|
|
// be: a comma continuing a comma expression, a semicolon terminating
|
|
// the statement that ended with |yield|, or the start of another
|
|
// statement (possibly an expression statement). The comma/semicolon
|
|
// cases are gotten as operators (None), contrasting with Operand
|
|
// earlier.
|
|
// b) After an arrow function with a block body in an expression
|
|
// statement, the next token must be: a colon in a conditional
|
|
// expression, a comma continuing a comma expression, a semicolon
|
|
// terminating the statement, or the token on a subsequent line that is
|
|
// the start of another statement (possibly an expression statement).
|
|
// Colon is gotten as operator (None), and it should only be gotten in
|
|
// conditional expression and missing it results in SyntaxError.
|
|
// Comma/semicolon cases are also gotten as operators (None), and 4th
|
|
// case is gotten after them. If no comma/semicolon found but EOL,
|
|
// the next token should be gotten as operand in 4th case (especially if
|
|
// '/' is the first character). So we should peek the token as
|
|
// operand before try getting colon/comma/semicolon.
|
|
// See also the comment in Parser::assignExpr().
|
|
NoneIsOperand,
|
|
|
|
// If a semicolon is inserted automatically, the next token is already
|
|
// gotten with None, but we expect Operand.
|
|
OperandIsNone,
|
|
|
|
// If name of method definition is `get` or `set`, the next token is
|
|
// already gotten with KeywordIsName, but we expect None.
|
|
NoneIsKeywordIsName,
|
|
};
|
|
friend class TokenStream;
|
|
|
|
public:
|
|
TokenKind type; // char value or above enumerator
|
|
TokenPos pos; // token position in file
|
|
union {
|
|
private:
|
|
friend struct Token;
|
|
PropertyName* name; // non-numeric atom
|
|
JSAtom* atom; // potentially-numeric atom
|
|
struct {
|
|
double value; // floating point number
|
|
DecimalPoint decimalPoint; // literal contains '.'
|
|
} number;
|
|
RegExpFlag reflags; // regexp flags; use tokenbuf to access
|
|
// regexp chars
|
|
} u;
|
|
#ifdef DEBUG
|
|
Modifier modifier; // Modifier used to get this token
|
|
ModifierException modifierException; // Exception for this modifier
|
|
#endif
|
|
|
|
// This constructor is necessary only for MSVC 2013 and how it compiles the
|
|
// initialization of TokenStream::tokens. That field is initialized as
|
|
// tokens() in the constructor init-list. This *should* zero the entire
|
|
// array, then (because Token has a non-trivial constructor, because
|
|
// TokenPos has a user-provided constructor) call the implicit Token
|
|
// constructor on each element, which would call the TokenPos constructor
|
|
// for Token::pos and do nothing. (All of which is equivalent to just
|
|
// zeroing TokenStream::tokens.) But MSVC 2013 (2010/2012 don't have this
|
|
// bug) doesn't zero out each element, so we need this extra constructor to
|
|
// make it do the right thing. (Token is used primarily by reference or
|
|
// pointer, and it's only initialized a very few places, so having a
|
|
// user-defined constructor won't hurt perf.) See also bug 920318.
|
|
Token()
|
|
: pos(0, 0)
|
|
{
|
|
MOZ_MAKE_MEM_UNDEFINED(&type, sizeof(type));
|
|
}
|
|
|
|
// Mutators
|
|
|
|
void setName(PropertyName* name) {
|
|
MOZ_ASSERT(type == TOK_NAME);
|
|
u.name = name;
|
|
}
|
|
|
|
void setAtom(JSAtom* atom) {
|
|
MOZ_ASSERT(type == TOK_STRING ||
|
|
type == TOK_TEMPLATE_HEAD ||
|
|
type == TOK_NO_SUBS_TEMPLATE);
|
|
u.atom = atom;
|
|
}
|
|
|
|
void setRegExpFlags(js::RegExpFlag flags) {
|
|
MOZ_ASSERT(type == TOK_REGEXP);
|
|
MOZ_ASSERT((flags & AllFlags) == flags);
|
|
u.reflags = flags;
|
|
}
|
|
|
|
void setNumber(double n, DecimalPoint decimalPoint) {
|
|
MOZ_ASSERT(type == TOK_NUMBER);
|
|
u.number.value = n;
|
|
u.number.decimalPoint = decimalPoint;
|
|
}
|
|
|
|
// Type-safe accessors
|
|
|
|
PropertyName* name() const {
|
|
MOZ_ASSERT(type == TOK_NAME);
|
|
return u.name->JSAtom::asPropertyName(); // poor-man's type verification
|
|
}
|
|
|
|
bool nameContainsEscape() const {
|
|
PropertyName* n = name();
|
|
return pos.begin + n->length() != pos.end;
|
|
}
|
|
|
|
JSAtom* atom() const {
|
|
MOZ_ASSERT(type == TOK_STRING ||
|
|
type == TOK_TEMPLATE_HEAD ||
|
|
type == TOK_NO_SUBS_TEMPLATE);
|
|
return u.atom;
|
|
}
|
|
|
|
js::RegExpFlag regExpFlags() const {
|
|
MOZ_ASSERT(type == TOK_REGEXP);
|
|
MOZ_ASSERT((u.reflags & AllFlags) == u.reflags);
|
|
return u.reflags;
|
|
}
|
|
|
|
double number() const {
|
|
MOZ_ASSERT(type == TOK_NUMBER);
|
|
return u.number.value;
|
|
}
|
|
|
|
DecimalPoint decimalPoint() const {
|
|
MOZ_ASSERT(type == TOK_NUMBER);
|
|
return u.number.decimalPoint;
|
|
}
|
|
};
|
|
|
|
struct CompileError {
|
|
JSErrorReport report;
|
|
char* message;
|
|
ErrorArgumentsType argumentsType;
|
|
CompileError() : message(nullptr), argumentsType(ArgumentsAreUnicode) {}
|
|
~CompileError();
|
|
void throwError(JSContext* cx);
|
|
|
|
private:
|
|
// CompileError owns raw allocated memory, so disable assignment and copying
|
|
// for safety.
|
|
void operator=(const CompileError&) = delete;
|
|
CompileError(const CompileError&) = delete;
|
|
};
|
|
|
|
// Ideally, tokenizing would be entirely independent of context. But the
|
|
// strict mode flag, which is in SharedContext, affects tokenizing, and
|
|
// TokenStream needs to see it.
|
|
//
|
|
// This class is a tiny back-channel from TokenStream to the strict mode flag
|
|
// that avoids exposing the rest of SharedContext to TokenStream.
|
|
//
|
|
class StrictModeGetter {
|
|
public:
|
|
virtual bool strictMode() = 0;
|
|
};
|
|
|
|
// TokenStream is the lexical scanner for Javascript source text.
|
|
//
|
|
// It takes a buffer of char16_t characters and linearly scans it into |Token|s.
|
|
// Internally the class uses a four element circular buffer |tokens| of
|
|
// |Token|s. As an index for |tokens|, the member |cursor| points to the
|
|
// current token.
|
|
// Calls to getToken() increase |cursor| by one and return the new current
|
|
// token. If a TokenStream was just created, the current token is initialized
|
|
// with random data (i.e. not initialized). It is therefore important that
|
|
// one of the first four member functions listed below is called first.
|
|
// The circular buffer lets us go back up to two tokens from the last
|
|
// scanned token. Internally, the relative number of backward steps that were
|
|
// taken (via ungetToken()) after the last token was scanned is stored in
|
|
// |lookahead|.
|
|
//
|
|
// The following table lists in which situations it is safe to call each listed
|
|
// function. No checks are made by the functions in non-debug builds.
|
|
//
|
|
// Function Name | Precondition; changes to |lookahead|
|
|
// ------------------+---------------------------------------------------------
|
|
// getToken | none; if |lookahead > 0| then |lookahead--|
|
|
// peekToken | none; if |lookahead == 0| then |lookahead == 1|
|
|
// peekTokenSameLine | none; if |lookahead == 0| then |lookahead == 1|
|
|
// matchToken | none; if |lookahead > 0| and the match succeeds then
|
|
// | |lookahead--|
|
|
// consumeKnownToken | none; if |lookahead > 0| then |lookahead--|
|
|
// ungetToken | 0 <= |lookahead| <= |maxLookahead - 1|; |lookahead++|
|
|
//
|
|
// The behavior of the token scanning process (see getTokenInternal()) can be
|
|
// modified by calling one of the first four above listed member functions with
|
|
// an optional argument of type Modifier. However, the modifier will be
|
|
// ignored unless |lookahead == 0| holds. Due to constraints of the grammar,
|
|
// this turns out not to be a problem in practice. See the
|
|
// mozilla.dev.tech.js-engine.internals thread entitled 'Bug in the scanner?'
|
|
// for more details:
|
|
// https://groups.google.com/forum/?fromgroups=#!topic/mozilla.dev.tech.js-engine.internals/2JLH5jRcr7E).
|
|
//
|
|
// The methods seek() and tell() allow to rescan from a previous visited
|
|
// location of the buffer.
|
|
//
|
|
class MOZ_STACK_CLASS TokenStream
|
|
{
|
|
// Unicode separators that are treated as line terminators, in addition to \n, \r.
|
|
enum {
|
|
LINE_SEPARATOR = 0x2028,
|
|
PARA_SEPARATOR = 0x2029
|
|
};
|
|
|
|
static const size_t ntokens = 4; // 1 current + 2 lookahead, rounded
|
|
// to power of 2 to avoid divmod by 3
|
|
static const unsigned maxLookahead = 2;
|
|
static const unsigned ntokensMask = ntokens - 1;
|
|
|
|
public:
|
|
typedef Vector<char16_t, 32> CharBuffer;
|
|
|
|
TokenStream(ExclusiveContext* cx, const ReadOnlyCompileOptions& options,
|
|
const char16_t* base, size_t length, StrictModeGetter* smg);
|
|
|
|
~TokenStream();
|
|
|
|
bool checkOptions();
|
|
|
|
// Accessors.
|
|
const Token& currentToken() const { return tokens[cursor]; }
|
|
bool isCurrentTokenType(TokenKind type) const {
|
|
return currentToken().type == type;
|
|
}
|
|
const CharBuffer& getTokenbuf() const { return tokenbuf; }
|
|
const char* getFilename() const { return filename; }
|
|
unsigned getLineno() const { return lineno; }
|
|
unsigned getColumn() const { return userbuf.offset() - linebase - 1; }
|
|
bool getMutedErrors() const { return mutedErrors; }
|
|
JSVersion versionNumber() const { return VersionNumber(options().version); }
|
|
JSVersion versionWithFlags() const { return options().version; }
|
|
|
|
PropertyName* currentName() const {
|
|
if (isCurrentTokenType(TOK_YIELD))
|
|
return cx->names().yield;
|
|
MOZ_ASSERT(isCurrentTokenType(TOK_NAME));
|
|
return currentToken().name();
|
|
}
|
|
|
|
PropertyName* nextName() const {
|
|
if (nextToken().type == TOK_YIELD)
|
|
return cx->names().yield;
|
|
MOZ_ASSERT(nextToken().type == TOK_NAME);
|
|
return nextToken().name();
|
|
}
|
|
|
|
bool isCurrentTokenAssignment() const {
|
|
return TokenKindIsAssignment(currentToken().type);
|
|
}
|
|
|
|
// Flag methods.
|
|
bool isEOF() const { return flags.isEOF; }
|
|
bool sawOctalEscape() const { return flags.sawOctalEscape; }
|
|
bool hadError() const { return flags.hadError; }
|
|
|
|
// TokenStream-specific error reporters.
|
|
bool reportError(unsigned errorNumber, ...);
|
|
bool reportErrorNoOffset(unsigned errorNumber, ...);
|
|
bool reportWarning(unsigned errorNumber, ...);
|
|
|
|
static const uint32_t NoOffset = UINT32_MAX;
|
|
|
|
// General-purpose error reporters. You should avoid calling these
|
|
// directly, and instead use the more succinct alternatives (e.g.
|
|
// reportError()) in TokenStream, Parser, and BytecodeEmitter.
|
|
bool reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber,
|
|
va_list args);
|
|
bool reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber,
|
|
va_list args);
|
|
bool reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber,
|
|
va_list args);
|
|
|
|
// asm.js reporter
|
|
void reportAsmJSError(uint32_t offset, unsigned errorNumber, ...);
|
|
|
|
JSAtom* getRawTemplateStringAtom() {
|
|
MOZ_ASSERT(currentToken().type == TOK_TEMPLATE_HEAD ||
|
|
currentToken().type == TOK_NO_SUBS_TEMPLATE);
|
|
const char16_t* cur = userbuf.rawCharPtrAt(currentToken().pos.begin + 1);
|
|
const char16_t* end;
|
|
if (currentToken().type == TOK_TEMPLATE_HEAD) {
|
|
// Of the form |`...${| or |}...${|
|
|
end = userbuf.rawCharPtrAt(currentToken().pos.end - 2);
|
|
} else {
|
|
// NO_SUBS_TEMPLATE is of the form |`...`| or |}...`|
|
|
end = userbuf.rawCharPtrAt(currentToken().pos.end - 1);
|
|
}
|
|
|
|
CharBuffer charbuf(cx);
|
|
while (cur < end) {
|
|
int32_t ch = *cur;
|
|
if (ch == '\r') {
|
|
ch = '\n';
|
|
if ((cur + 1 < end) && (*(cur + 1) == '\n'))
|
|
cur++;
|
|
}
|
|
if (!charbuf.append(ch))
|
|
return nullptr;
|
|
cur++;
|
|
}
|
|
return AtomizeChars(cx, charbuf.begin(), charbuf.length());
|
|
}
|
|
|
|
private:
|
|
// These are private because they should only be called by the tokenizer
|
|
// while tokenizing not by, for example, BytecodeEmitter.
|
|
bool reportStrictModeError(unsigned errorNumber, ...);
|
|
bool strictMode() const { return strictModeGetter && strictModeGetter->strictMode(); }
|
|
|
|
static JSAtom* atomize(ExclusiveContext* cx, CharBuffer& cb);
|
|
bool putIdentInTokenbuf(const char16_t* identStart);
|
|
|
|
struct Flags
|
|
{
|
|
bool isEOF:1; // Hit end of file.
|
|
bool isDirtyLine:1; // Non-whitespace since start of line.
|
|
bool sawOctalEscape:1; // Saw an octal character escape.
|
|
bool hadError:1; // Hit a syntax error, at start or during a
|
|
// token.
|
|
bool hitOOM:1; // Hit OOM.
|
|
|
|
Flags()
|
|
: isEOF(), isDirtyLine(), sawOctalEscape(), hadError(), hitOOM()
|
|
{}
|
|
};
|
|
|
|
bool awaitIsKeyword = false;
|
|
friend class AutoAwaitIsKeyword;
|
|
|
|
public:
|
|
typedef Token::Modifier Modifier;
|
|
static MOZ_CONSTEXPR_VAR Modifier None = Token::None;
|
|
static MOZ_CONSTEXPR_VAR Modifier Operand = Token::Operand;
|
|
static MOZ_CONSTEXPR_VAR Modifier KeywordIsName = Token::KeywordIsName;
|
|
static MOZ_CONSTEXPR_VAR Modifier TemplateTail = Token::TemplateTail;
|
|
|
|
typedef Token::ModifierException ModifierException;
|
|
static MOZ_CONSTEXPR_VAR ModifierException NoException = Token::NoException;
|
|
static MOZ_CONSTEXPR_VAR ModifierException NoneIsOperand = Token::NoneIsOperand;
|
|
static MOZ_CONSTEXPR_VAR ModifierException OperandIsNone = Token::OperandIsNone;
|
|
static MOZ_CONSTEXPR_VAR ModifierException NoneIsKeywordIsName = Token::NoneIsKeywordIsName;
|
|
|
|
void addModifierException(ModifierException modifierException) {
|
|
#ifdef DEBUG
|
|
const Token& next = nextToken();
|
|
if (next.modifierException == NoneIsOperand)
|
|
{
|
|
// Token after yield expression without operand already has
|
|
// NoneIsOperand exception.
|
|
MOZ_ASSERT(modifierException == OperandIsNone);
|
|
MOZ_ASSERT(next.type != TOK_DIV,
|
|
"next token requires contextual specifier to be parsed unambiguously");
|
|
|
|
// Do not update modifierException.
|
|
return;
|
|
}
|
|
|
|
MOZ_ASSERT(next.modifierException == NoException);
|
|
switch (modifierException) {
|
|
case NoneIsOperand:
|
|
MOZ_ASSERT(next.modifier == Operand);
|
|
MOZ_ASSERT(next.type != TOK_DIV,
|
|
"next token requires contextual specifier to be parsed unambiguously");
|
|
break;
|
|
case OperandIsNone:
|
|
MOZ_ASSERT(next.modifier == None);
|
|
MOZ_ASSERT(next.type != TOK_DIV && next.type != TOK_REGEXP,
|
|
"next token requires contextual specifier to be parsed unambiguously");
|
|
break;
|
|
case NoneIsKeywordIsName:
|
|
MOZ_ASSERT(next.modifier == KeywordIsName);
|
|
MOZ_ASSERT(next.type != TOK_NAME);
|
|
break;
|
|
default:
|
|
MOZ_CRASH("unexpected modifier exception");
|
|
}
|
|
tokens[(cursor + 1) & ntokensMask].modifierException = modifierException;
|
|
#endif
|
|
}
|
|
|
|
void
|
|
verifyConsistentModifier(Modifier modifier, Token lookaheadToken) {
|
|
#ifdef DEBUG
|
|
// Easy case: modifiers match.
|
|
if (modifier == lookaheadToken.modifier)
|
|
return;
|
|
|
|
if (lookaheadToken.modifierException == OperandIsNone) {
|
|
// getToken(Operand) permissibly following getToken().
|
|
if (modifier == Operand && lookaheadToken.modifier == None)
|
|
return;
|
|
}
|
|
|
|
if (lookaheadToken.modifierException == NoneIsOperand) {
|
|
// getToken() permissibly following getToken(Operand).
|
|
if (modifier == None && lookaheadToken.modifier == Operand)
|
|
return;
|
|
}
|
|
|
|
if (lookaheadToken.modifierException == NoneIsKeywordIsName) {
|
|
// getToken() permissibly following getToken(KeywordIsName).
|
|
if (modifier == None && lookaheadToken.modifier == KeywordIsName)
|
|
return;
|
|
}
|
|
|
|
MOZ_ASSERT_UNREACHABLE("this token was previously looked up with a "
|
|
"different modifier, potentially making "
|
|
"tokenization non-deterministic");
|
|
#endif
|
|
}
|
|
|
|
// Advance to the next token. If the token stream encountered an error,
|
|
// return false. Otherwise return true and store the token kind in |*ttp|.
|
|
bool getToken(TokenKind* ttp, Modifier modifier = None) {
|
|
// Check for a pushed-back token resulting from mismatching lookahead.
|
|
if (lookahead != 0) {
|
|
MOZ_ASSERT(!flags.hadError);
|
|
lookahead--;
|
|
cursor = (cursor + 1) & ntokensMask;
|
|
TokenKind tt = currentToken().type;
|
|
MOZ_ASSERT(tt != TOK_EOL);
|
|
verifyConsistentModifier(modifier, currentToken());
|
|
*ttp = tt;
|
|
return true;
|
|
}
|
|
|
|
return getTokenInternal(ttp, modifier);
|
|
}
|
|
|
|
// Push the last scanned token back into the stream.
|
|
void ungetToken() {
|
|
MOZ_ASSERT(lookahead < maxLookahead);
|
|
lookahead++;
|
|
cursor = (cursor - 1) & ntokensMask;
|
|
}
|
|
|
|
bool peekToken(TokenKind* ttp, Modifier modifier = None) {
|
|
if (lookahead > 0) {
|
|
MOZ_ASSERT(!flags.hadError);
|
|
verifyConsistentModifier(modifier, nextToken());
|
|
*ttp = nextToken().type;
|
|
return true;
|
|
}
|
|
if (!getTokenInternal(ttp, modifier))
|
|
return false;
|
|
ungetToken();
|
|
return true;
|
|
}
|
|
|
|
bool peekTokenPos(TokenPos* posp, Modifier modifier = None) {
|
|
if (lookahead == 0) {
|
|
TokenKind tt;
|
|
if (!getTokenInternal(&tt, modifier))
|
|
return false;
|
|
ungetToken();
|
|
MOZ_ASSERT(hasLookahead());
|
|
} else {
|
|
MOZ_ASSERT(!flags.hadError);
|
|
verifyConsistentModifier(modifier, nextToken());
|
|
}
|
|
*posp = nextToken().pos;
|
|
return true;
|
|
}
|
|
|
|
// This is like peekToken(), with one exception: if there is an EOL
|
|
// between the end of the current token and the start of the next token, it
|
|
// return true and store TOK_EOL in |*ttp|. In that case, no token with
|
|
// TOK_EOL is actually created, just a TOK_EOL TokenKind is returned, and
|
|
// currentToken() shouldn't be consulted. (This is the only place TOK_EOL
|
|
// is produced.)
|
|
MOZ_ALWAYS_INLINE bool
|
|
peekTokenSameLine(TokenKind* ttp, Modifier modifier = None) {
|
|
const Token& curr = currentToken();
|
|
|
|
// If lookahead != 0, we have scanned ahead at least one token, and
|
|
// |lineno| is the line that the furthest-scanned token ends on. If
|
|
// it's the same as the line that the current token ends on, that's a
|
|
// stronger condition than what we are looking for, and we don't need
|
|
// to return TOK_EOL.
|
|
if (lookahead != 0) {
|
|
bool onThisLine;
|
|
if (!srcCoords.isOnThisLine(curr.pos.end, lineno, &onThisLine))
|
|
return reportError(JSMSG_OUT_OF_MEMORY);
|
|
if (onThisLine) {
|
|
MOZ_ASSERT(!flags.hadError);
|
|
verifyConsistentModifier(modifier, nextToken());
|
|
*ttp = nextToken().type;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// The above check misses two cases where we don't have to return
|
|
// TOK_EOL.
|
|
// - The next token starts on the same line, but is a multi-line token.
|
|
// - The next token starts on the same line, but lookahead==2 and there
|
|
// is a newline between the next token and the one after that.
|
|
// The following test is somewhat expensive but gets these cases (and
|
|
// all others) right.
|
|
TokenKind tmp;
|
|
if (!getToken(&tmp, modifier))
|
|
return false;
|
|
const Token& next = currentToken();
|
|
ungetToken();
|
|
|
|
*ttp = srcCoords.lineNum(curr.pos.end) == srcCoords.lineNum(next.pos.begin)
|
|
? next.type
|
|
: TOK_EOL;
|
|
return true;
|
|
}
|
|
|
|
// Get the next token from the stream if its kind is |tt|.
|
|
bool matchToken(bool* matchedp, TokenKind tt, Modifier modifier = None) {
|
|
TokenKind token;
|
|
if (!getToken(&token, modifier))
|
|
return false;
|
|
if (token == tt) {
|
|
*matchedp = true;
|
|
} else {
|
|
ungetToken();
|
|
*matchedp = false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void consumeKnownToken(TokenKind tt, Modifier modifier = None) {
|
|
bool matched;
|
|
MOZ_ASSERT(hasLookahead());
|
|
MOZ_ALWAYS_TRUE(matchToken(&matched, tt, modifier));
|
|
MOZ_ALWAYS_TRUE(matched);
|
|
}
|
|
|
|
// Like matchToken(..., TOK_NAME) but further matching the name token only
|
|
// if it has the given characters, without containing escape sequences.
|
|
// If the name token has the given characters yet *does* contain an escape,
|
|
// a syntax error will be reported.
|
|
//
|
|
// This latter behavior makes this method unsuitable for use in any context
|
|
// where ASI might occur. In such places, an escaped "contextual keyword"
|
|
// on a new line is the start of an ExpressionStatement, not a continuation
|
|
// of a StatementListItem (or ImportDeclaration or ExportDeclaration, in
|
|
// modules).
|
|
bool matchContextualKeyword(bool* matchedp, Handle<PropertyName*> keyword,
|
|
Modifier modifier = None)
|
|
{
|
|
TokenKind token;
|
|
if (!getToken(&token, modifier))
|
|
return false;
|
|
if (token == TOK_NAME && currentToken().name() == keyword) {
|
|
if (currentToken().nameContainsEscape()) {
|
|
reportError(JSMSG_ESCAPED_KEYWORD);
|
|
return false;
|
|
}
|
|
|
|
*matchedp = true;
|
|
} else {
|
|
*matchedp = false;
|
|
ungetToken();
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool nextTokenEndsExpr(bool* endsExpr) {
|
|
TokenKind tt;
|
|
if (!peekToken(&tt))
|
|
return false;
|
|
*endsExpr = isExprEnding[tt];
|
|
return true;
|
|
}
|
|
|
|
class MOZ_STACK_CLASS Position {
|
|
public:
|
|
// The Token fields may contain pointers to atoms, so for correct
|
|
// rooting we must ensure collection of atoms is disabled while objects
|
|
// of this class are live. Do this by requiring a dummy AutoKeepAtoms
|
|
// reference in the constructor.
|
|
//
|
|
// This class is explicity ignored by the analysis, so don't add any
|
|
// more pointers to GC things here!
|
|
explicit Position(AutoKeepAtoms&) { }
|
|
private:
|
|
Position(const Position&) = delete;
|
|
friend class TokenStream;
|
|
const char16_t* buf;
|
|
Flags flags;
|
|
unsigned lineno;
|
|
size_t linebase;
|
|
size_t prevLinebase;
|
|
Token currentToken;
|
|
unsigned lookahead;
|
|
Token lookaheadTokens[maxLookahead];
|
|
};
|
|
|
|
bool advance(size_t position);
|
|
void tell(Position*);
|
|
void seek(const Position& pos);
|
|
bool seek(const Position& pos, const TokenStream& other);
|
|
#ifdef DEBUG
|
|
inline bool debugHasNoLookahead() const {
|
|
return lookahead == 0;
|
|
}
|
|
#endif
|
|
|
|
const char16_t* rawCharPtrAt(size_t offset) const {
|
|
return userbuf.rawCharPtrAt(offset);
|
|
}
|
|
|
|
const char16_t* rawLimit() const {
|
|
return userbuf.limit();
|
|
}
|
|
|
|
bool hasDisplayURL() const {
|
|
return displayURL_ != nullptr;
|
|
}
|
|
|
|
char16_t* displayURL() {
|
|
return displayURL_.get();
|
|
}
|
|
|
|
bool hasSourceMapURL() const {
|
|
return sourceMapURL_ != nullptr;
|
|
}
|
|
|
|
char16_t* sourceMapURL() {
|
|
return sourceMapURL_.get();
|
|
}
|
|
|
|
// If |atom| is not a keyword in this version, return true with *ttp
|
|
// unchanged.
|
|
//
|
|
// If it is a reserved word in this version and strictness mode, and thus
|
|
// can't be present in correct code, report a SyntaxError and return false.
|
|
//
|
|
// If it is a keyword, like "if", the behavior depends on ttp. If ttp is
|
|
// null, report a SyntaxError ("if is a reserved identifier") and return
|
|
// false. If ttp is non-null, return true with the keyword's TokenKind in
|
|
// *ttp.
|
|
bool checkForKeyword(JSAtom* atom, TokenKind* ttp);
|
|
|
|
// Same semantics as above, but for the provided keyword.
|
|
bool checkForKeyword(const KeywordInfo* kw, TokenKind* ttp);
|
|
|
|
// This class maps a userbuf offset (which is 0-indexed) to a line number
|
|
// (which is 1-indexed) and a column index (which is 0-indexed).
|
|
class SourceCoords
|
|
{
|
|
// For a given buffer holding source code, |lineStartOffsets_| has one
|
|
// element per line of source code, plus one sentinel element. Each
|
|
// non-sentinel element holds the buffer offset for the start of the
|
|
// corresponding line of source code. For this example script:
|
|
//
|
|
// 1 // xyz [line starts at offset 0]
|
|
// 2 var x; [line starts at offset 7]
|
|
// 3 [line starts at offset 14]
|
|
// 4 var y; [line starts at offset 15]
|
|
//
|
|
// |lineStartOffsets_| is:
|
|
//
|
|
// [0, 7, 14, 15, MAX_PTR]
|
|
//
|
|
// To convert a "line number" to a "line index" (i.e. an index into
|
|
// |lineStartOffsets_|), subtract |initialLineNum_|. E.g. line 3's
|
|
// line index is (3 - initialLineNum_), which is 2. Therefore
|
|
// lineStartOffsets_[2] holds the buffer offset for the start of line 3,
|
|
// which is 14. (Note that |initialLineNum_| is often 1, but not
|
|
// always.)
|
|
//
|
|
// The first element is always 0, and the last element is always the
|
|
// MAX_PTR sentinel.
|
|
//
|
|
// offset-to-line/column lookups are O(log n) in the worst case (binary
|
|
// search), but in practice they're heavily clustered and we do better
|
|
// than that by using the previous lookup's result (lastLineIndex_) as
|
|
// a starting point.
|
|
//
|
|
// Checking if an offset lies within a particular line number
|
|
// (isOnThisLine()) is O(1).
|
|
//
|
|
Vector<uint32_t, 128> lineStartOffsets_;
|
|
uint32_t initialLineNum_;
|
|
|
|
// This is mutable because it's modified on every search, but that fact
|
|
// isn't visible outside this class.
|
|
mutable uint32_t lastLineIndex_;
|
|
|
|
uint32_t lineIndexOf(uint32_t offset) const;
|
|
|
|
static const uint32_t MAX_PTR = UINT32_MAX;
|
|
|
|
uint32_t lineIndexToNum(uint32_t lineIndex) const { return lineIndex + initialLineNum_; }
|
|
uint32_t lineNumToIndex(uint32_t lineNum) const { return lineNum - initialLineNum_; }
|
|
|
|
public:
|
|
SourceCoords(ExclusiveContext* cx, uint32_t ln);
|
|
|
|
bool add(uint32_t lineNum, uint32_t lineStartOffset);
|
|
bool fill(const SourceCoords& other);
|
|
|
|
bool isOnThisLine(uint32_t offset, uint32_t lineNum, bool* onThisLine) const {
|
|
uint32_t lineIndex = lineNumToIndex(lineNum);
|
|
if (lineIndex + 1 >= lineStartOffsets_.length()) // +1 due to sentinel
|
|
return false;
|
|
*onThisLine = lineStartOffsets_[lineIndex] <= offset &&
|
|
offset < lineStartOffsets_[lineIndex + 1];
|
|
return true;
|
|
}
|
|
|
|
uint32_t lineNum(uint32_t offset) const;
|
|
uint32_t columnIndex(uint32_t offset) const;
|
|
void lineNumAndColumnIndex(uint32_t offset, uint32_t* lineNum, uint32_t* columnIndex) const;
|
|
};
|
|
|
|
SourceCoords srcCoords;
|
|
|
|
JSAtomState& names() const {
|
|
return cx->names();
|
|
}
|
|
|
|
ExclusiveContext* context() const {
|
|
return cx;
|
|
}
|
|
|
|
const ReadOnlyCompileOptions& options() const {
|
|
return options_;
|
|
}
|
|
|
|
private:
|
|
// This is the low-level interface to the JS source code buffer. It just
|
|
// gets raw chars, basically. TokenStreams functions are layered on top
|
|
// and do some extra stuff like converting all EOL sequences to '\n',
|
|
// tracking the line number, and setting |flags.isEOF|. (The "raw" in "raw
|
|
// chars" refers to the lack of EOL sequence normalization.)
|
|
//
|
|
// buf[0..length-1] often represents a substring of some larger source,
|
|
// where we have only the substring in memory. The |startOffset| argument
|
|
// indicates the offset within this larger string at which our string
|
|
// begins, the offset of |buf[0]|.
|
|
class TokenBuf {
|
|
public:
|
|
TokenBuf(ExclusiveContext* cx, const char16_t* buf, size_t length, size_t startOffset)
|
|
: base_(buf),
|
|
startOffset_(startOffset),
|
|
limit_(buf + length),
|
|
ptr(buf)
|
|
{ }
|
|
|
|
bool hasRawChars() const {
|
|
return ptr < limit_;
|
|
}
|
|
|
|
bool atStart() const {
|
|
return offset() == 0;
|
|
}
|
|
|
|
size_t startOffset() const {
|
|
return startOffset_;
|
|
}
|
|
|
|
size_t offset() const {
|
|
return startOffset_ + mozilla::PointerRangeSize(base_, ptr);
|
|
}
|
|
|
|
const char16_t* rawCharPtrAt(size_t offset) const {
|
|
MOZ_ASSERT(startOffset_ <= offset);
|
|
MOZ_ASSERT(offset - startOffset_ <= mozilla::PointerRangeSize(base_, limit_));
|
|
return base_ + (offset - startOffset_);
|
|
}
|
|
|
|
const char16_t* limit() const {
|
|
return limit_;
|
|
}
|
|
|
|
char16_t getRawChar() {
|
|
return *ptr++; // this will nullptr-crash if poisoned
|
|
}
|
|
|
|
char16_t peekRawChar() const {
|
|
return *ptr; // this will nullptr-crash if poisoned
|
|
}
|
|
|
|
bool matchRawChar(char16_t c) {
|
|
if (*ptr == c) { // this will nullptr-crash if poisoned
|
|
ptr++;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool matchRawCharBackwards(char16_t c) {
|
|
MOZ_ASSERT(ptr); // make sure it hasn't been poisoned
|
|
if (*(ptr - 1) == c) {
|
|
ptr--;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void ungetRawChar() {
|
|
MOZ_ASSERT(ptr); // make sure it hasn't been poisoned
|
|
ptr--;
|
|
}
|
|
|
|
const char16_t* addressOfNextRawChar(bool allowPoisoned = false) const {
|
|
MOZ_ASSERT_IF(!allowPoisoned, ptr); // make sure it hasn't been poisoned
|
|
return ptr;
|
|
}
|
|
|
|
// Use this with caution!
|
|
void setAddressOfNextRawChar(const char16_t* a, bool allowPoisoned = false) {
|
|
MOZ_ASSERT_IF(!allowPoisoned, a);
|
|
ptr = a;
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
// Poison the TokenBuf so it cannot be accessed again.
|
|
void poison() {
|
|
ptr = nullptr;
|
|
}
|
|
#endif
|
|
|
|
static bool isRawEOLChar(int32_t c) {
|
|
return c == '\n' || c == '\r' || c == LINE_SEPARATOR || c == PARA_SEPARATOR;
|
|
}
|
|
|
|
// Returns the offset of the next EOL, but stops once 'max' characters
|
|
// have been scanned (*including* the char at startOffset_).
|
|
size_t findEOLMax(size_t start, size_t max);
|
|
|
|
private:
|
|
const char16_t* base_; // base of buffer
|
|
uint32_t startOffset_; // offset of base_[0]
|
|
const char16_t* limit_; // limit for quick bounds check
|
|
const char16_t* ptr; // next char to get
|
|
};
|
|
|
|
bool getTokenInternal(TokenKind* ttp, Modifier modifier);
|
|
|
|
bool getBracedUnicode(uint32_t* code);
|
|
bool getStringOrTemplateToken(int untilChar, Token** tp);
|
|
|
|
int32_t getChar();
|
|
int32_t getCharIgnoreEOL();
|
|
void ungetChar(int32_t c);
|
|
void ungetCharIgnoreEOL(int32_t c);
|
|
Token* newToken(ptrdiff_t adjust);
|
|
bool peekUnicodeEscape(int32_t* c);
|
|
bool matchUnicodeEscapeIdStart(int32_t* c);
|
|
bool matchUnicodeEscapeIdent(int32_t* c);
|
|
bool peekChars(int n, char16_t* cp);
|
|
|
|
bool getDirectives(bool isMultiline, bool shouldWarnDeprecated);
|
|
bool getDirective(bool isMultiline, bool shouldWarnDeprecated,
|
|
const char* directive, int directiveLength,
|
|
const char* errorMsgPragma,
|
|
mozilla::UniquePtr<char16_t[], JS::FreePolicy>* destination);
|
|
bool getDisplayURL(bool isMultiline, bool shouldWarnDeprecated);
|
|
bool getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated);
|
|
|
|
// |expect| cannot be an EOL char.
|
|
bool matchChar(int32_t expect) {
|
|
MOZ_ASSERT(!TokenBuf::isRawEOLChar(expect));
|
|
return MOZ_LIKELY(userbuf.hasRawChars()) &&
|
|
userbuf.matchRawChar(expect);
|
|
}
|
|
|
|
void consumeKnownChar(int32_t expect) {
|
|
mozilla::DebugOnly<int32_t> c = getChar();
|
|
MOZ_ASSERT(c == expect);
|
|
}
|
|
|
|
int32_t peekChar() {
|
|
int32_t c = getChar();
|
|
ungetChar(c);
|
|
return c;
|
|
}
|
|
|
|
void skipChars(int n) {
|
|
while (--n >= 0)
|
|
getChar();
|
|
}
|
|
|
|
void updateLineInfoForEOL();
|
|
void updateFlagsForEOL();
|
|
|
|
const Token& nextToken() const {
|
|
MOZ_ASSERT(hasLookahead());
|
|
return tokens[(cursor + 1) & ntokensMask];
|
|
}
|
|
|
|
bool hasLookahead() const { return lookahead > 0; }
|
|
|
|
// Options used for parsing/tokenizing.
|
|
const ReadOnlyCompileOptions& options_;
|
|
|
|
Token tokens[ntokens]; // circular token buffer
|
|
unsigned cursor; // index of last parsed token
|
|
unsigned lookahead; // count of lookahead tokens
|
|
unsigned lineno; // current line number
|
|
Flags flags; // flags -- see above
|
|
size_t linebase; // start of current line
|
|
size_t prevLinebase; // start of previous line; size_t(-1) if on the first line
|
|
TokenBuf userbuf; // user input buffer
|
|
const char* filename; // input filename or null
|
|
mozilla::UniquePtr<char16_t[], JS::FreePolicy> displayURL_; // the user's requested source URL or null
|
|
mozilla::UniquePtr<char16_t[], JS::FreePolicy> sourceMapURL_; // source map's filename or null
|
|
CharBuffer tokenbuf; // current token string buffer
|
|
uint8_t isExprEnding[TOK_LIMIT];// which tokens definitely terminate exprs?
|
|
ExclusiveContext* const cx;
|
|
bool mutedErrors;
|
|
StrictModeGetter* strictModeGetter; // used to test for strict mode
|
|
};
|
|
|
|
class MOZ_STACK_CLASS AutoAwaitIsKeyword
|
|
{
|
|
private:
|
|
TokenStream* ts_;
|
|
bool oldAwaitIsKeyword_;
|
|
|
|
public:
|
|
AutoAwaitIsKeyword(TokenStream* ts, bool awaitIsKeyword) {
|
|
ts_ = ts;
|
|
oldAwaitIsKeyword_ = ts_->awaitIsKeyword;
|
|
ts_->awaitIsKeyword = awaitIsKeyword;
|
|
}
|
|
|
|
~AutoAwaitIsKeyword() {
|
|
ts_->awaitIsKeyword = oldAwaitIsKeyword_;
|
|
ts_ = nullptr;
|
|
}
|
|
};
|
|
|
|
// Steal one JSREPORT_* bit (see jsapi.h) to tell that arguments to the error
|
|
// message have const char16_t* type, not const char*.
|
|
#define JSREPORT_UC 0x100
|
|
|
|
extern const char*
|
|
TokenKindToDesc(TokenKind tt);
|
|
|
|
} // namespace frontend
|
|
} // namespace js
|
|
|
|
extern JS_FRIEND_API(int)
|
|
js_fgets(char* buf, int size, FILE* file);
|
|
|
|
#ifdef DEBUG
|
|
extern const char*
|
|
TokenKindToString(js::frontend::TokenKind tt);
|
|
#endif
|
|
|
|
#endif /* frontend_TokenStream_h */
|