tenfourfox/dom/security/nsCSPParser.h

256 lines
8.7 KiB
C++

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef nsCSPParser_h___
#define nsCSPParser_h___
#include "nsCSPUtils.h"
#include "nsIURI.h"
#include "nsString.h"
/**
* How does the parsing work?
*
* We generate tokens by splitting the policy-string by whitespace and semicolon.
* Interally the tokens are represented as an array of string-arrays:
*
* [
* [ name, src, src, src, ... ],
* [ name, src, src, src, ... ],
* [ name, src, src, src, ... ]
* ]
*
* for example:
* [
* [ img-src, http://www.example.com, http:www.test.com ],
* [ default-src, 'self'],
* [ script-src, 'unsafe-eval', 'unsafe-inline' ],
* ]
*
* The first element of each array has to be a valid directive-name, otherwise we can
* ignore the remaining elements of the array. Also, if the
* directive already exists in the current policy, we can ignore
* the remaining elements of that array. (http://www.w3.org/TR/CSP/#parsing)
*/
typedef nsTArray< nsTArray<nsString> > cspTokens;
class nsCSPTokenizer {
public:
static void tokenizeCSPPolicy(const nsAString &aPolicyString, cspTokens& outTokens);
private:
nsCSPTokenizer(const char16_t* aStart, const char16_t* aEnd);
~nsCSPTokenizer();
inline bool atEnd()
{
return mCurChar >= mEndChar;
}
inline void skipWhiteSpace()
{
while (mCurChar < mEndChar && *mCurChar == ' ') {
mCurToken.Append(*mCurChar++);
}
mCurToken.Truncate();
}
inline void skipWhiteSpaceAndSemicolon()
{
while (mCurChar < mEndChar && (*mCurChar == ' ' || *mCurChar == ';')) {
mCurToken.Append(*mCurChar++);
}
mCurToken.Truncate();
}
inline bool accept(char16_t aChar)
{
NS_ASSERTION(mCurChar < mEndChar, "Trying to dereference mEndChar");
if (*mCurChar == aChar) {
mCurToken.Append(*mCurChar++);
return true;
}
return false;
}
void generateNextToken();
void generateTokens(cspTokens& outTokens);
const char16_t* mCurChar;
const char16_t* mEndChar;
nsString mCurToken;
};
class nsCSPParser {
public:
/**
* The CSP parser only has one publicly accessible function, which is parseContentSecurityPolicy.
* Internally the input string is separated into string tokens and policy() is called, which starts
* parsing the policy. The parser calls one function after the other according the the source-list
* from http://www.w3.org/TR/CSP11/#source-list. E.g., the parser can only call port() after the parser
* has already processed any possible host in host(), similar to a finite state machine.
*/
static nsCSPPolicy* parseContentSecurityPolicy(const nsAString &aPolicyString,
nsIURI *aSelfURI,
bool aReportOnly,
nsCSPContext* aCSPContext,
bool aDeliveredViaMetaTag);
private:
nsCSPParser(cspTokens& aTokens,
nsIURI* aSelfURI,
nsCSPContext* aCSPContext,
bool aDeliveredViaMetaTag);
~nsCSPParser();
// Parsing the CSP using the source-list from http://www.w3.org/TR/CSP11/#source-list
nsCSPPolicy* policy();
void directive();
nsCSPDirective* directiveName();
void directiveValue(nsTArray<nsCSPBaseSrc*>& outSrcs);
void referrerDirectiveValue();
void sourceList(nsTArray<nsCSPBaseSrc*>& outSrcs);
nsCSPBaseSrc* sourceExpression();
nsCSPSchemeSrc* schemeSource();
nsCSPHostSrc* hostSource();
nsCSPBaseSrc* keywordSource();
nsCSPNonceSrc* nonceSource();
nsCSPHashSrc* hashSource();
nsCSPHostSrc* appHost(); // helper function to support app specific hosts
nsCSPHostSrc* host();
bool hostChar();
bool schemeChar();
bool port();
bool path(nsCSPHostSrc* aCspHost);
bool subHost(); // helper function to parse subDomains
bool atValidUnreservedChar(); // helper function to parse unreserved
bool atValidSubDelimChar(); // helper function to parse sub-delims
bool atValidPctEncodedChar(); // helper function to parse pct-encoded
bool subPath(nsCSPHostSrc* aCspHost); // helper function to parse paths
void reportURIList(nsTArray<nsCSPBaseSrc*>& outSrcs); // helper function to parse report-uris
void percentDecodeStr(const nsAString& aEncStr, // helper function to percent-decode
nsAString& outDecStr);
inline bool atEnd()
{
return mCurChar >= mEndChar;
}
inline bool accept(char16_t aSymbol)
{
if (atEnd()) { return false; }
return (*mCurChar == aSymbol) && advance();
}
inline bool accept(bool (*aClassifier) (char16_t))
{
if (atEnd()) { return false; }
return (aClassifier(*mCurChar)) && advance();
}
inline bool peek(char16_t aSymbol)
{
if (atEnd()) { return false; }
return *mCurChar == aSymbol;
}
inline bool peek(bool (*aClassifier) (char16_t))
{
if (atEnd()) { return false; }
return aClassifier(*mCurChar);
}
inline bool advance()
{
if (atEnd()) { return false; }
mCurValue.Append(*mCurChar++);
return true;
}
inline void resetCurValue()
{
mCurValue.Truncate();
}
bool atEndOfPath();
bool atValidPathChar();
void resetCurChar(const nsAString& aToken);
void logWarningErrorToConsole(uint32_t aSeverityFlag,
const char* aProperty,
const char16_t* aParams[],
uint32_t aParamsLength);
/**
* When parsing the policy, the parser internally uses the following helper
* variables/members which are used/reset during parsing. The following
* example explains how they are used.
* The tokenizer separats all input into arrays of arrays of strings, which
* are stored in mTokens, for example:
* mTokens = [ [ script-src, http://www.example.com, 'self' ], ... ]
*
* When parsing starts, mCurdir always holds the currently processed array of strings.
* In our example:
* mCurDir = [ script-src, http://www.example.com, 'self' ]
*
* During parsing, we process/consume one string at a time of that array.
* We set mCurToken to the string we are currently processing; in the first case
* that would be:
* mCurToken = script-src
* which allows to do simple string comparisons to see if mCurToken is a valid directive.
*
* Continuing parsing, the parser consumes the next string of that array, resetting:
* mCurToken = "http://www.example.com"
* ^ ^
* mCurChar mEndChar (points *after* the 'm')
* mCurValue = ""
*
* After calling advance() the first time, helpers would hold the following values:
* mCurToken = "http://www.example.com"
* ^ ^
* mCurChar mEndChar (points *after* the 'm')
* mCurValue = "h"
*
* We continue parsing till all strings of one directive are consumed, then we reset
* mCurDir to hold the next array of strings and start the process all over.
*/
const char16_t* mCurChar;
const char16_t* mEndChar;
nsString mCurValue;
nsString mCurToken;
nsTArray<nsString> mCurDir;
// cache variables to ignore unsafe-inline if hash or nonce is specified
bool mHasHashOrNonce; // false, if no hash or nonce is defined
nsCSPKeywordSrc* mUnsafeInlineKeywordSrc; // null, otherwise invlidate()
// cache variables for child-src and frame-src directive handling.
// frame-src is deprecated in favor of child-src, however if we
// see a frame-src directive, it takes precedence for frames and iframes.
// At the end of parsing, if we have a child-src directive, we need to
// decide whether it will handle frames, or if there is a frame-src we
// should honor instead.
nsCSPChildSrcDirective* mChildSrc;
nsCSPDirective* mFrameSrc;
cspTokens mTokens;
nsIURI* mSelfURI;
nsCSPPolicy* mPolicy;
nsCSPContext* mCSPContext; // used for console logging
bool mDeliveredViaMetaTag;
};
#endif /* nsCSPParser_h___ */