diff --git a/netwerk/base/nsIOService.cpp b/netwerk/base/nsIOService.cpp index 1de6b14f8..529365790 100644 --- a/netwerk/base/nsIOService.cpp +++ b/netwerk/base/nsIOService.cpp @@ -569,7 +569,7 @@ nsIOService::GetProtocolHandler(const char* scheme, nsIProtocolHandler* *result) NS_IMETHODIMP nsIOService::ExtractScheme(const nsACString &inURI, nsACString &scheme) { - return net_ExtractURLScheme(inURI, nullptr, nullptr, &scheme); + return net_ExtractURLScheme(inURI, scheme); } NS_IMETHODIMP diff --git a/netwerk/base/nsSimpleURI.cpp b/netwerk/base/nsSimpleURI.cpp index e716e4ac2..968f3a948 100644 --- a/netwerk/base/nsSimpleURI.cpp +++ b/netwerk/base/nsSimpleURI.cpp @@ -193,17 +193,11 @@ nsSimpleURI::SetSpec(const nsACString &aSpec) { NS_ENSURE_STATE(mMutable); - const nsAFlatCString& flat = PromiseFlatCString(aSpec); - const char* specPtr = flat.get(); - // filter out unexpected chars "\r\n\t" if necessary nsAutoCString filteredSpec; - int32_t specLen; - if (net_FilterURIString(specPtr, filteredSpec)) { - specPtr = filteredSpec.get(); - specLen = filteredSpec.Length(); - } else - specLen = flat.Length(); + net_FilterURIString(aSpec, filteredSpec); + const char* specPtr = filteredSpec.get(); + int32_t specLen = filteredSpec.Length(); // nsSimpleURI currently restricts the charset to US-ASCII nsAutoCString spec; diff --git a/netwerk/base/nsStandardURL.cpp b/netwerk/base/nsStandardURL.cpp index 8910e67d1..331fd78db 100644 --- a/netwerk/base/nsStandardURL.cpp +++ b/netwerk/base/nsStandardURL.cpp @@ -1286,22 +1286,26 @@ nsStandardURL::SetSpec(const nsACString &input) { ENSURE_MUTABLE(); +#if DEBUG + // Don't pay the flat tax in optimized builds. const nsPromiseFlatCString &flat = PromiseFlatCString(input); - const char *spec = flat.get(); - int32_t specLength = flat.Length(); - - LOG(("nsStandardURL::SetSpec [spec=%s]\n", spec)); - - if (!spec || !*spec) - return NS_ERROR_MALFORMED_URI; + LOG(("nsStandardURL::SetSpec [spec=%s]\n", flat.get())); +#endif if (input.Length() > (uint32_t) net_GetURLMaxLength()) { return NS_ERROR_MALFORMED_URI; } - // NUL characters aren't allowed - // \r\n\t are stripped out instead of returning error(see below) - if (input.Contains('\0')) { + // filter out unexpected chars "\r\n\t" if necessary + nsAutoCString filteredURI; + net_FilterURIString(input, filteredURI); + + if (filteredURI.Length() == 0) { + return NS_ERROR_MALFORMED_URI; + } + + // NUL characters aren't allowed in the filtered URI. + if (filteredURI.Contains('\0')) { return NS_ERROR_MALFORMED_URI; } @@ -1310,12 +1314,8 @@ nsStandardURL::SetSpec(const nsACString &input) prevURL.CopyMembers(this, eHonorRef); Clear(); - // filter out unexpected chars "\r\n\t" if necessary - nsAutoCString buf1; - if (net_FilterURIString(spec, buf1)) { - spec = buf1.get(); - specLength = buf1.Length(); - } + const char *spec = filteredURI.get(); + int32_t specLength = filteredURI.Length(); // parse the given URL... nsresult rv = ParseURL(spec, specLength); @@ -2069,18 +2069,12 @@ nsresult nsStandardURL::CopyMembers(nsStandardURL * source, NS_IMETHODIMP nsStandardURL::Resolve(const nsACString &in, nsACString &out) { - const nsPromiseFlatCString &flat = PromiseFlatCString(in); - const char *relpath = flat.get(); - // filter out unexpected chars "\r\n\t" if necessary nsAutoCString buf; - int32_t relpathLen; - if (net_FilterURIString(relpath, buf)) { - relpath = buf.get(); - relpathLen = buf.Length(); - } else - relpathLen = flat.Length(); - + net_FilterURIString(in, buf); + const char *relpath = buf.get(); + int32_t relpathLen = buf.Length(); + char *result = nullptr; LOG(("nsStandardURL::Resolve [this=%p spec=%s relpath=%s]\n", @@ -2935,20 +2929,8 @@ nsStandardURL::Init(uint32_t urlType, mOriginCharset = charset; } - if (baseURI) { - uint32_t start, end; - // pull out the scheme and where it ends - nsresult rv = net_ExtractURLScheme(spec, &start, &end, nullptr); - if (NS_SUCCEEDED(rv) && spec.Length() > end+2) { - nsACString::const_iterator slash; - spec.BeginReading(slash); - slash.advance(end+1); - // then check if // follows - // if it follows, aSpec is really absolute ... - // ignore aBaseURI in this case - if (*slash == '/' && *(++slash) == '/') - baseURI = nullptr; - } + if (baseURI && net_IsAbsoluteURL(spec)) { + baseURI = nullptr; } if (!baseURI) diff --git a/netwerk/base/nsURLHelper.cpp b/netwerk/base/nsURLHelper.cpp index 9b6393316..a3d747262 100644 --- a/netwerk/base/nsURLHelper.cpp +++ b/netwerk/base/nsURLHelper.cpp @@ -14,6 +14,7 @@ #include "nsNetCID.h" #include "mozilla/Preferences.h" #include "prnetdb.h" +#include "mozilla/Tokenizer.h" #include "mozilla-config.h" #include "plvmx.h" @@ -183,11 +184,11 @@ net_ParseFileURL(const nsACString &inURL, const nsPromiseFlatCString &flatURL = PromiseFlatCString(inURL); const char *url = flatURL.get(); - uint32_t schemeBeg, schemeEnd; - rv = net_ExtractURLScheme(flatURL, &schemeBeg, &schemeEnd, nullptr); + nsAutoCString scheme; + rv = net_ExtractURLScheme(flatURL, scheme); if (NS_FAILED(rv)) return rv; - if (strncmp(url + schemeBeg, "file", schemeEnd - schemeBeg) != 0) { + if (!scheme.EqualsLiteral("file")) { NS_ERROR("must be a file:// url"); return NS_ERROR_UNEXPECTED; } @@ -486,57 +487,62 @@ net_ResolveRelativePath(const nsACString &relativePath, // scheme fu //---------------------------------------------------------------------------- +#if !defined(MOZILLA_XPCOMRT_API) +static bool isAsciiAlpha(char c) { + return nsCRT::IsAsciiAlpha(c); +} + +static bool +net_IsValidSchemeChar(const char aChar) +{ + if (nsCRT::IsAsciiAlpha(aChar) || nsCRT::IsAsciiDigit(aChar) || + aChar == '+' || aChar == '.' || aChar == '-') { + return true; + } + return false; +} +#endif + /* Extract URI-Scheme if possible */ nsresult net_ExtractURLScheme(const nsACString &inURI, - uint32_t *startPos, - uint32_t *endPos, - nsACString *scheme) + nsACString& scheme) { - // search for something up to a colon, and call it the scheme - const nsPromiseFlatCString &flatURI = PromiseFlatCString(inURI); - const char* uri_start = flatURI.get(); - const char* uri = uri_start; +#if defined(MOZILLA_XPCOMRT_API) + NS_WARNING("net_ExtractURLScheme not implemented"); + return NS_ERROR_NOT_IMPLEMENTED; +#else + nsACString::const_iterator start, end; + inURI.BeginReading(start); + inURI.EndReading(end); - if (!uri) - return NS_ERROR_MALFORMED_URI; - - // skip leading white space - while (nsCRT::IsAsciiSpace(*uri)) - uri++; - - uint32_t start = uri - uri_start; - if (startPos) { - *startPos = start; - } - - uint32_t length = 0; - char c; - while ((c = *uri++) != '\0') { - // First char must be Alpha - if (length == 0 && nsCRT::IsAsciiAlpha(c)) { - length++; - } - // Next chars can be alpha + digit + some special chars - else if (length > 0 && (nsCRT::IsAsciiAlpha(c) || - nsCRT::IsAsciiDigit(c) || c == '+' || - c == '.' || c == '-')) { - length++; - } - // stop if colon reached but not as first char - else if (c == ':' && length > 0) { - if (endPos) { - *endPos = start + length; - } - - if (scheme) - scheme->Assign(Substring(inURI, start, length)); - return NS_OK; - } - else + // Strip C0 and space from begining + while (start != end) { + if ((uint8_t) *start > 0x20) { break; + } + start++; } - return NS_ERROR_MALFORMED_URI; + + Tokenizer p(Substring(start, end), "\r\n\t"); + p.Record(); + if (!p.CheckChar(isAsciiAlpha)) { + // First char must be alpha + return NS_ERROR_MALFORMED_URI; + } + + while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) { + // Skip valid scheme characters or \r\n\t + } + + if (!p.CheckChar(':')) { + return NS_ERROR_MALFORMED_URI; + } + + p.Claim(scheme); + scheme.StripChars("\r\n\t"); + return NS_OK; +#endif } bool @@ -560,86 +566,79 @@ net_IsValidScheme(const char *scheme, uint32_t schemeLen) } bool -net_FilterURIString(const char *str, nsACString& result) +net_IsAbsoluteURL(const nsACString& uri) +{ +#if !defined(MOZILLA_XPCOMRT_API) + nsACString::const_iterator start, end; + uri.BeginReading(start); + uri.EndReading(end); + + // Strip C0 and space from begining + while (start != end) { + if ((uint8_t) *start > 0x20) { + break; + } + start++; + } + + Tokenizer p(Substring(start, end), "\r\n\t"); + + // First char must be alpha + if (!p.CheckChar(isAsciiAlpha)) { + return false; + } + + while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) { + // Skip valid scheme characters or \r\n\t + } + if (!p.CheckChar(':')) { + return false; + } + p.SkipWhites(); + + if (!p.CheckChar('/')) { + return false; + } + p.SkipWhites(); + + if (p.CheckChar('/')) { + // aSpec is really absolute. Ignore aBaseURI in this case + return true; + } +#endif + return false; +} + +void +net_FilterURIString(const nsACString& input, nsACString& result) { - NS_PRECONDITION(str, "Must have a non-null string!"); - bool writing = false; result.Truncate(); - const char *p = str; - // Remove leading spaces, tabs, CR, LF if any. - while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') { - writing = true; - str = p + 1; - p++; + nsACString::const_iterator start, end; + input.BeginReading(start); + input.EndReading(end); + + // Strip C0 and space from begining + while (start != end) { + if ((uint8_t) *start > 0x20) { + break; + } + start++; } - // Don't strip from the scheme, because other code assumes everything - // up to the ':' is the scheme, and it's bad not to have it match. - // If there's no ':', strip. - bool found_colon = false; - const char *first = nullptr; - while (*p) { - switch (*p) { - case '\t': - case '\r': - case '\n': - if (found_colon) { - writing = true; - // append chars up to but not including *p - if (p > str) - result.Append(str, p - str); - str = p + 1; - } else { - // remember where the first \t\r\n was in case we find no scheme - if (!first) - first = p; - } - break; - - case ':': - found_colon = true; - break; - - case '/': - case '@': - if (!found_colon) { - // colon also has to precede / or @ to be a scheme - found_colon = true; // not really, but means ok to strip - if (first) { - // go back and replace - p = first; - continue; // process *p again - } - } - break; - - default: - break; - } - p++; - - // At end, if there was no scheme, and we hit a control char, fix - // it up now. - if (!*p && first != nullptr && !found_colon) { - // TRICKY - to avoid duplicating code, we reset the loop back - // to the point we found something to do - p = first; - // This also stops us from looping after we finish - found_colon = true; // so we'll replace \t\r\n + MOZ_ASSERT(!*end, "input should null terminated"); + // Strip C0 and space from end + while (end != start) { + end--; + if ((uint8_t) *end > 0x20) { + end++; + break; } } - // Remove trailing spaces if any - while (((p-1) >= str) && (*(p-1) == ' ')) { - writing = true; - p--; - } - - if (writing && p > str) - result.Append(str, p - str); - - return writing; + nsAutoCString temp(Substring(start, end)); + temp.StripChars("\r\n\t"); + result.Assign(temp); } #if defined(XP_WIN) diff --git a/netwerk/base/nsURLHelper.h b/netwerk/base/nsURLHelper.h index bc4dacf36..f30814c25 100644 --- a/netwerk/base/nsURLHelper.h +++ b/netwerk/base/nsURLHelper.h @@ -79,18 +79,22 @@ nsresult net_ResolveRelativePath(const nsACString &relativePath, const nsACString &basePath, nsACString &result); +/** + * Check if a URL is absolute + * + * @param inURL URL spec + * @return true if the given spec represents an absolute URL + */ +bool net_IsAbsoluteURL(const nsACString& inURL); + /** * Extract URI-Scheme if possible * * @param inURI URI spec - * @param startPos start of scheme (may be null) - * @param endPos end of scheme; index of colon (may be null) * @param scheme scheme copied to this buffer on return (may be null) */ nsresult net_ExtractURLScheme(const nsACString &inURI, - uint32_t *startPos, - uint32_t *endPos, - nsACString *scheme = nullptr); + nsACString &scheme); /* check that the given scheme conforms to RFC 2396 */ bool net_IsValidScheme(const char *scheme, uint32_t schemeLen); @@ -101,22 +105,15 @@ inline bool net_IsValidScheme(const nsAFlatCString &scheme) } /** - * Filter out whitespace from a URI string. The input is the |str| - * pointer. |result| is written to if and only if there is whitespace that has - * to be filtered out. The return value is true if and only if |result| is - * written to. + * This function strips out all C0 controls and space at the beginning and end + * of the URL and filters out \r, \n, \t from the middle of the URL. This makes + * it safe to call on things like javascript: urls or data: urls, where we may + * in fact run into whitespace that is not properly encoded. * - * This function strips out all whitespace at the beginning and end of the URL - * and strips out \r, \n, \t from the middle of the URL. This makes it safe to - * call on things like javascript: urls or data: urls, where we may in fact run - * into whitespace that is not properly encoded. Note that stripping does not - * occur in the scheme portion itself. - * - * @param str the pointer to the string to filter. Must be non-null. + * @param input the URL spec we want to filter * @param result the out param to write to if filtering happens - * @return whether result was written to */ -bool net_FilterURIString(const char *str, nsACString& result); +void net_FilterURIString(const nsACString& input, nsACString& result); #if defined(XP_WIN) /** diff --git a/netwerk/protocol/http/Http2Stream.cpp b/netwerk/protocol/http/Http2Stream.cpp index fb99bd277..99d7fe5df 100644 --- a/netwerk/protocol/http/Http2Stream.cpp +++ b/netwerk/protocol/http/Http2Stream.cpp @@ -354,7 +354,7 @@ nsresult Http2Stream::MakeOriginURL(const nsACString &origin, RefPtr &url) { nsAutoCString scheme; - nsresult rv = net_ExtractURLScheme(origin, nullptr, nullptr, &scheme); + nsresult rv = net_ExtractURLScheme(origin, scheme); NS_ENSURE_SUCCESS(rv, rv); return MakeOriginURL(scheme, origin, url); } diff --git a/netwerk/protocol/res/SubstitutingProtocolHandler.cpp b/netwerk/protocol/res/SubstitutingProtocolHandler.cpp index 181aabd72..eb651058e 100644 --- a/netwerk/protocol/res/SubstitutingProtocolHandler.cpp +++ b/netwerk/protocol/res/SubstitutingProtocolHandler.cpp @@ -54,7 +54,7 @@ SubstitutingURL::EnsureFile() return rv; nsAutoCString scheme; - rv = net_ExtractURLScheme(spec, nullptr, nullptr, &scheme); + rv = net_ExtractURLScheme(spec, scheme); if (NS_FAILED(rv)) return rv; diff --git a/netwerk/streamconv/converters/nsIndexedToHTML.cpp b/netwerk/streamconv/converters/nsIndexedToHTML.cpp index 92fee9537..44159ac68 100644 --- a/netwerk/streamconv/converters/nsIndexedToHTML.cpp +++ b/netwerk/streamconv/converters/nsIndexedToHTML.cpp @@ -759,8 +759,10 @@ nsIndexedToHTML::OnIndexAvailable(nsIRequest *aRequest, // for some protocols, we expect the location to be absolute. // if so, and if the location indeed appears to be a valid URI, then go // ahead and treat it like one. + + nsAutoCString scheme; if (mExpectAbsLoc && - NS_SUCCEEDED(net_ExtractURLScheme(loc, nullptr, nullptr, nullptr))) { + NS_SUCCEEDED(net_ExtractURLScheme(loc, scheme))) { // escape as absolute escFlags = esc_Forced | esc_AlwaysCopy | esc_Minimal; } diff --git a/xpcom/ds/Tokenizer.cpp b/xpcom/ds/Tokenizer.cpp index 8d23a4b5b..3aca2583c 100644 --- a/xpcom/ds/Tokenizer.cpp +++ b/xpcom/ds/Tokenizer.cpp @@ -263,12 +263,12 @@ Tokenizer::Parse(Token& aToken) const state = PARSE_WORD; } else if (IsNumber(*next)) { state = PARSE_INTEGER; + } else if (VMX_STRCHR(mWhitespaces, *next)) { // not UTF-8 friendly? + state = PARSE_WS; } else if (*next == '\r') { state = PARSE_CRLF; } else if (*next == '\n') { state = PARSE_LF; - } else if (VMX_STRCHR(mWhitespaces, *next)) { // not UTF-8 friendly? - state = PARSE_WS; } else { state = PARSE_CHAR; }