#632: M241788 M1271955 M1249352(p1) + additional local optimizations

2025-04-12 16:37:15 +00:00 · 2020-11-22 17:11:45 -08:00 · 2020-11-22 17:11:45 -08:00 · 438bdb7265
commit 438bdb7265
parent 82cb3b59e5
9 changed files with 166 additions and 192 deletions
--- a/netwerk/base/nsIOService.cpp
+++ b/netwerk/base/nsIOService.cpp
@ -569,7 +569,7 @@ nsIOService::GetProtocolHandler(const char* scheme, nsIProtocolHandler* *result)
 NS_IMETHODIMP
 nsIOService::ExtractScheme(const nsACString &inURI, nsACString &scheme)
 {
-    return net_ExtractURLScheme(inURI, nullptr, nullptr, &scheme);
+    return net_ExtractURLScheme(inURI, scheme);
 }

 NS_IMETHODIMP 
--- a/netwerk/base/nsSimpleURI.cpp
+++ b/netwerk/base/nsSimpleURI.cpp
@ -193,17 +193,11 @@ nsSimpleURI::SetSpec(const nsACString &aSpec)
 {
    NS_ENSURE_STATE(mMutable);
    
-    const nsAFlatCString& flat = PromiseFlatCString(aSpec);
-    const char* specPtr = flat.get();
-
    // filter out unexpected chars "\r\n\t" if necessary
    nsAutoCString filteredSpec;
-    int32_t specLen;
-    if (net_FilterURIString(specPtr, filteredSpec)) {
-        specPtr = filteredSpec.get();
-        specLen = filteredSpec.Length();
-    } else
-        specLen = flat.Length();
+    net_FilterURIString(aSpec, filteredSpec);
+    const char* specPtr = filteredSpec.get();
+    int32_t specLen = filteredSpec.Length();

    // nsSimpleURI currently restricts the charset to US-ASCII
    nsAutoCString spec;
--- a/netwerk/base/nsStandardURL.cpp
+++ b/netwerk/base/nsStandardURL.cpp
@ -1286,22 +1286,26 @@ nsStandardURL::SetSpec(const nsACString &input)
 {
    ENSURE_MUTABLE();

+#if DEBUG
+    // Don't pay the flat tax in optimized builds.
    const nsPromiseFlatCString &flat = PromiseFlatCString(input);
-    const char *spec = flat.get();
-    int32_t specLength = flat.Length();
-
-    LOG(("nsStandardURL::SetSpec [spec=%s]\n", spec));
-
-    if (!spec || !*spec)
-        return NS_ERROR_MALFORMED_URI;
+    LOG(("nsStandardURL::SetSpec [spec=%s]\n", flat.get()));
+#endif

    if (input.Length() > (uint32_t) net_GetURLMaxLength()) {
        return NS_ERROR_MALFORMED_URI;
    }

-    // NUL characters aren't allowed
-    // \r\n\t are stripped out instead of returning error(see below)
-    if (input.Contains('\0')) {
+    // filter out unexpected chars "\r\n\t" if necessary
+    nsAutoCString filteredURI;
+    net_FilterURIString(input, filteredURI);
+
+    if (filteredURI.Length() == 0) {
+        return NS_ERROR_MALFORMED_URI;
+    }
+
+    // NUL characters aren't allowed in the filtered URI.
+    if (filteredURI.Contains('\0')) {
        return NS_ERROR_MALFORMED_URI;
    }

@ -1310,12 +1314,8 @@ nsStandardURL::SetSpec(const nsACString &input)
    prevURL.CopyMembers(this, eHonorRef);
    Clear();

-    // filter out unexpected chars "\r\n\t" if necessary
-    nsAutoCString buf1;
-    if (net_FilterURIString(spec, buf1)) {
-        spec = buf1.get();
-        specLength = buf1.Length();
-    }
+    const char *spec = filteredURI.get();
+    int32_t specLength = filteredURI.Length();

    // parse the given URL...
    nsresult rv = ParseURL(spec, specLength);
@ -2069,18 +2069,12 @@ nsresult nsStandardURL::CopyMembers(nsStandardURL * source,
 NS_IMETHODIMP
 nsStandardURL::Resolve(const nsACString &in, nsACString &out)
 {
-    const nsPromiseFlatCString &flat = PromiseFlatCString(in);
-    const char *relpath = flat.get();
-
    // filter out unexpected chars "\r\n\t" if necessary
    nsAutoCString buf;
-    int32_t relpathLen;
-    if (net_FilterURIString(relpath, buf)) {
-        relpath = buf.get();
-        relpathLen = buf.Length();
-    } else
-        relpathLen = flat.Length();
-    
+    net_FilterURIString(in, buf);
+    const char *relpath = buf.get();
+    int32_t relpathLen = buf.Length();
+
    char *result = nullptr;

    LOG(("nsStandardURL::Resolve [this=%p spec=%s relpath=%s]\n",
@ -2935,20 +2929,8 @@ nsStandardURL::Init(uint32_t urlType,
        mOriginCharset = charset;
    }

-    if (baseURI) {
-        uint32_t start, end;
-        // pull out the scheme and where it ends
-        nsresult rv = net_ExtractURLScheme(spec, &start, &end, nullptr);
-        if (NS_SUCCEEDED(rv) && spec.Length() > end+2) {
-            nsACString::const_iterator slash;
-            spec.BeginReading(slash);
-            slash.advance(end+1);
-            // then check if // follows
-            // if it follows, aSpec is really absolute ... 
-            // ignore aBaseURI in this case
-            if (*slash == '/' && *(++slash) == '/')
-                baseURI = nullptr;
-        }
+    if (baseURI && net_IsAbsoluteURL(spec)) {
+        baseURI = nullptr;
    }

    if (!baseURI)
--- a/netwerk/base/nsURLHelper.cpp
+++ b/netwerk/base/nsURLHelper.cpp
@ -14,6 +14,7 @@
 #include "nsNetCID.h"
 #include "mozilla/Preferences.h"
 #include "prnetdb.h"
+#include "mozilla/Tokenizer.h"

 #include "mozilla-config.h"
 #include "plvmx.h"
@ -183,11 +184,11 @@ net_ParseFileURL(const nsACString &inURL,
    const nsPromiseFlatCString &flatURL = PromiseFlatCString(inURL);
    const char *url = flatURL.get();
    
-    uint32_t schemeBeg, schemeEnd;
-    rv = net_ExtractURLScheme(flatURL, &schemeBeg, &schemeEnd, nullptr);
+    nsAutoCString scheme;
+    rv = net_ExtractURLScheme(flatURL, scheme);
    if (NS_FAILED(rv)) return rv;

-    if (strncmp(url + schemeBeg, "file", schemeEnd - schemeBeg) != 0) {
+    if (!scheme.EqualsLiteral("file")) {
        NS_ERROR("must be a file:// url");
        return NS_ERROR_UNEXPECTED;
    }
@ -486,57 +487,62 @@ net_ResolveRelativePath(const nsACString &relativePath,
 // scheme fu
 //----------------------------------------------------------------------------

+#if !defined(MOZILLA_XPCOMRT_API)
+static bool isAsciiAlpha(char c) {
+    return nsCRT::IsAsciiAlpha(c);
+}
+
+static bool
+net_IsValidSchemeChar(const char aChar)
+{
+    if (nsCRT::IsAsciiAlpha(aChar) || nsCRT::IsAsciiDigit(aChar) ||
+        aChar == '+' || aChar == '.' || aChar == '-') {
+        return true;
+    }
+    return false;
+}
+#endif
+
 /* Extract URI-Scheme if possible */
 nsresult
 net_ExtractURLScheme(const nsACString &inURI,
-                     uint32_t *startPos, 
-                     uint32_t *endPos,
-                     nsACString *scheme)
+                     nsACString& scheme)
 {
-    // search for something up to a colon, and call it the scheme
-    const nsPromiseFlatCString &flatURI = PromiseFlatCString(inURI);
-    const char* uri_start = flatURI.get();
-    const char* uri = uri_start;
+#if defined(MOZILLA_XPCOMRT_API)
+    NS_WARNING("net_ExtractURLScheme not implemented");
+    return NS_ERROR_NOT_IMPLEMENTED;
+#else
+    nsACString::const_iterator start, end;
+    inURI.BeginReading(start);
+    inURI.EndReading(end);

-    if (!uri)
-        return NS_ERROR_MALFORMED_URI;
-
-    // skip leading white space
-    while (nsCRT::IsAsciiSpace(*uri))
-        uri++;
-
-    uint32_t start = uri - uri_start;
-    if (startPos) {
-        *startPos = start;
-    }
-
-    uint32_t length = 0;
-    char c;
-    while ((c = *uri++) != '\0') {
-        // First char must be Alpha
-        if (length == 0 && nsCRT::IsAsciiAlpha(c)) {
-            length++;
-        } 
-        // Next chars can be alpha + digit + some special chars
-        else if (length > 0 && (nsCRT::IsAsciiAlpha(c) || 
-                 nsCRT::IsAsciiDigit(c) || c == '+' || 
-                 c == '.' || c == '-')) {
-            length++;
-        }
-        // stop if colon reached but not as first char
-        else if (c == ':' && length > 0) {
-            if (endPos) {
-                *endPos = start + length;
-            }
-
-            if (scheme)
-                scheme->Assign(Substring(inURI, start, length));
-            return NS_OK;
-        }
-        else 
+    // Strip C0 and space from begining
+    while (start != end) {
+        if ((uint8_t) *start > 0x20) {
            break;
+        }
+        start++;
    }
-    return NS_ERROR_MALFORMED_URI;
+
+    Tokenizer p(Substring(start, end), "\r\n\t");
+    p.Record();
+    if (!p.CheckChar(isAsciiAlpha)) {
+        // First char must be alpha
+        return NS_ERROR_MALFORMED_URI;
+    }
+
+    while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
+        // Skip valid scheme characters or \r\n\t
+    }
+
+    if (!p.CheckChar(':')) {
+        return NS_ERROR_MALFORMED_URI;
+    }
+
+    p.Claim(scheme);
+    scheme.StripChars("\r\n\t");
+    return NS_OK;
+#endif
 }

 bool
@ -560,86 +566,79 @@ net_IsValidScheme(const char *scheme, uint32_t schemeLen)
 }

 bool
-net_FilterURIString(const char *str, nsACString& result)
+net_IsAbsoluteURL(const nsACString& uri)
+{
+#if !defined(MOZILLA_XPCOMRT_API)
+    nsACString::const_iterator start, end;
+    uri.BeginReading(start);
+    uri.EndReading(end);
+
+    // Strip C0 and space from begining
+    while (start != end) {
+        if ((uint8_t) *start > 0x20) {
+            break;
+        }
+        start++;
+    }
+
+    Tokenizer p(Substring(start, end), "\r\n\t");
+
+    // First char must be alpha
+    if (!p.CheckChar(isAsciiAlpha)) {
+        return false;
+    }
+
+    while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
+        // Skip valid scheme characters or \r\n\t
+    }
+    if (!p.CheckChar(':')) {
+        return false;
+    }
+    p.SkipWhites();
+
+    if (!p.CheckChar('/')) {
+        return false;
+    }
+    p.SkipWhites();
+
+    if (p.CheckChar('/')) {
+        // aSpec is really absolute. Ignore aBaseURI in this case
+        return true;
+    }
+#endif
+    return false;
+}
+
+void
+net_FilterURIString(const nsACString& input, nsACString& result)
 {
-    NS_PRECONDITION(str, "Must have a non-null string!");
-    bool writing = false;
    result.Truncate();
-    const char *p = str;

-    // Remove leading spaces, tabs, CR, LF if any.
-    while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') {
-        writing = true;
-        str = p + 1;
-        p++;
+    nsACString::const_iterator start, end;
+    input.BeginReading(start);
+    input.EndReading(end);
+
+    // Strip C0 and space from begining
+    while (start != end) {
+        if ((uint8_t) *start > 0x20) {
+            break;
+        }
+        start++;
    }

-    // Don't strip from the scheme, because other code assumes everything
-    // up to the ':' is the scheme, and it's bad not to have it match.
-    // If there's no ':', strip.
-    bool found_colon = false;
-    const char *first = nullptr;
-    while (*p) {
-        switch (*p) {
-            case '\t': 
-            case '\r': 
-            case '\n':
-                if (found_colon) {
-                    writing = true;
-                    // append chars up to but not including *p
-                    if (p > str)
-                        result.Append(str, p - str);
-                    str = p + 1;
-                } else {
-                    // remember where the first \t\r\n was in case we find no scheme
-                    if (!first)
-                        first = p;
-                }
-                break;
-
-            case ':':
-                found_colon = true;
-                break;
-
-            case '/':
-            case '@':
-                if (!found_colon) {
-                    // colon also has to precede / or @ to be a scheme
-                    found_colon = true; // not really, but means ok to strip
-                    if (first) {
-                        // go back and replace
-                        p = first;
-                        continue; // process *p again
-                    }
-                }
-                break;
-
-            default:
-                break;
-        }
-        p++;
-
-        // At end, if there was no scheme, and we hit a control char, fix
-        // it up now.
-        if (!*p && first != nullptr && !found_colon) {
-            // TRICKY - to avoid duplicating code, we reset the loop back
-            // to the point we found something to do
-            p = first;
-            // This also stops us from looping after we finish
-            found_colon = true; // so we'll replace \t\r\n
+    MOZ_ASSERT(!*end, "input should null terminated");
+    // Strip C0 and space from end
+    while (end != start) {
+        end--;
+        if ((uint8_t) *end > 0x20) {
+            end++;
+            break;
        }
    }

-    // Remove trailing spaces if any
-    while (((p-1) >= str) && (*(p-1) == ' ')) {
-        writing = true;
-        p--;
-    }
-
-    if (writing && p > str)
-        result.Append(str, p - str);
-
-    return writing;
+    nsAutoCString temp(Substring(start, end));
+    temp.StripChars("\r\n\t");
+    result.Assign(temp);
 }

 #if defined(XP_WIN)
--- a/netwerk/base/nsURLHelper.h
+++ b/netwerk/base/nsURLHelper.h
@ -79,18 +79,22 @@ nsresult net_ResolveRelativePath(const nsACString &relativePath,
                                             const nsACString &basePath,
                                             nsACString &result);

+/**
+ * Check if a URL is absolute
+ *
+ * @param inURL     URL spec
+ * @return true if the given spec represents an absolute URL
+ */
+bool net_IsAbsoluteURL(const nsACString& inURL);
+
 /**
 * Extract URI-Scheme if possible
 *
 * @param inURI     URI spec
- * @param startPos  start of scheme (may be null)
- * @param endPos    end of scheme; index of colon (may be null)
 * @param scheme    scheme copied to this buffer on return (may be null)
 */
 nsresult net_ExtractURLScheme(const nsACString &inURI,
-                                          uint32_t *startPos, 
-                                          uint32_t *endPos,
-                                          nsACString *scheme = nullptr);
+                              nsACString &scheme);

 /* check that the given scheme conforms to RFC 2396 */
 bool net_IsValidScheme(const char *scheme, uint32_t schemeLen);
@ -101,22 +105,15 @@ inline bool net_IsValidScheme(const nsAFlatCString &scheme)
 }

 /**
- * Filter out whitespace from a URI string.  The input is the |str|
- * pointer. |result| is written to if and only if there is whitespace that has
- * to be filtered out.  The return value is true if and only if |result| is
- * written to.
+ * This function strips out all C0 controls and space at the beginning and end
+ * of the URL and filters out \r, \n, \t from the middle of the URL.  This makes
+ * it safe to call on things like javascript: urls or data: urls, where we may
+ * in fact run into whitespace that is not properly encoded.
 *
- * This function strips out all whitespace at the beginning and end of the URL
- * and strips out \r, \n, \t from the middle of the URL.  This makes it safe to
- * call on things like javascript: urls or data: urls, where we may in fact run
- * into whitespace that is not properly encoded.  Note that stripping does not
- * occur in the scheme portion itself.
- *
- * @param str the pointer to the string to filter.  Must be non-null.
+ * @param input the URL spec we want to filter
 * @param result the out param to write to if filtering happens
- * @return whether result was written to
 */
-bool net_FilterURIString(const char *str, nsACString& result);
+void net_FilterURIString(const nsACString& input, nsACString& result);

 #if defined(XP_WIN)
 /**
--- a/netwerk/protocol/http/Http2Stream.cpp
+++ b/netwerk/protocol/http/Http2Stream.cpp
@ -354,7 +354,7 @@ nsresult
 Http2Stream::MakeOriginURL(const nsACString &origin, RefPtr<nsStandardURL> &url)
 {
  nsAutoCString scheme;
-  nsresult rv = net_ExtractURLScheme(origin, nullptr, nullptr, &scheme);
+  nsresult rv = net_ExtractURLScheme(origin, scheme);
  NS_ENSURE_SUCCESS(rv, rv);
  return MakeOriginURL(scheme, origin, url);
 }
--- a/netwerk/protocol/res/SubstitutingProtocolHandler.cpp
+++ b/netwerk/protocol/res/SubstitutingProtocolHandler.cpp
@ -54,7 +54,7 @@ SubstitutingURL::EnsureFile()
    return rv;

  nsAutoCString scheme;
-  rv = net_ExtractURLScheme(spec, nullptr, nullptr, &scheme);
+  rv = net_ExtractURLScheme(spec, scheme);
  if (NS_FAILED(rv))
    return rv;

--- a/netwerk/streamconv/converters/nsIndexedToHTML.cpp
+++ b/netwerk/streamconv/converters/nsIndexedToHTML.cpp
@ -759,8 +759,10 @@ nsIndexedToHTML::OnIndexAvailable(nsIRequest *aRequest,
    // for some protocols, we expect the location to be absolute.
    // if so, and if the location indeed appears to be a valid URI, then go
    // ahead and treat it like one.
+
+    nsAutoCString scheme;
    if (mExpectAbsLoc &&
-        NS_SUCCEEDED(net_ExtractURLScheme(loc, nullptr, nullptr, nullptr))) {
+        NS_SUCCEEDED(net_ExtractURLScheme(loc, scheme))) {
        // escape as absolute 
        escFlags = esc_Forced | esc_AlwaysCopy | esc_Minimal;
    }
--- a/xpcom/ds/Tokenizer.cpp
+++ b/xpcom/ds/Tokenizer.cpp
@ -263,12 +263,12 @@ Tokenizer::Parse(Token& aToken) const
    state = PARSE_WORD;
  } else if (IsNumber(*next)) {
    state = PARSE_INTEGER;
+  } else if (VMX_STRCHR(mWhitespaces, *next)) { // not UTF-8 friendly?
+    state = PARSE_WS;
  } else if (*next == '\r') {
    state = PARSE_CRLF;
  } else if (*next == '\n') {
    state = PARSE_LF;
-  } else if (VMX_STRCHR(mWhitespaces, *next)) { // not UTF-8 friendly?
-    state = PARSE_WS;
  } else {
    state = PARSE_CHAR;
  }