#583: update Readability to latest tip; update glue to new API

2025-08-06 05:26:58 +00:00 · 2019-12-19 22:28:55 -08:00
parent cb5d8f86a7
commit 8bd4c09a76
9 changed files with 1143 additions and 1021 deletions
--- a/.eslintignore
+++ b/.eslintignore
@@ -166,6 +166,7 @@ toolkit/components/places/**
 # Uses preprocessing
 toolkit/content/contentAreaUtils.js
 toolkit/components/jsdownloads/src/DownloadIntegration.jsm
+toolkit/components/reader/Readerable.jsm
 toolkit/components/search/nsSearchService.js
 toolkit/components/url-classifier/**
 toolkit/components/urlformatter/nsURLFormatter.js
--- a/browser/base/content/tab-content.js
+++ b/browser/base/content/tab-content.js
@@ -24,6 +24,8 @@ XPCOMUtils.defineLazyModuleGetter(this, "AboutReader",
  "resource://gre/modules/AboutReader.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "ReaderMode",
  "resource://gre/modules/ReaderMode.jsm");
+XPCOMUtils.defineLazyModuleGetter(this, "Readerable",
+  "resource://gre/modules/Readerable.jsm");
 XPCOMUtils.defineLazyGetter(this, "SimpleServiceDiscovery", function() {
  let ssdp = Cu.import("resource://gre/modules/SimpleServiceDiscovery.jsm", {}).SimpleServiceDiscovery;
  // Register targets
@@ -344,7 +346,7 @@ var AboutReaderListener = {
   * painted is not going to work.
   */
  updateReaderButton: function(forceNonArticle) {
-    if (!ReaderMode.isEnabledForParseOnLoad || this.isAboutReader ||
+    if (!Readerable.isEnabledForParseOnLoad || this.isAboutReader ||
        !(content.document instanceof content.HTMLDocument) ||
        content.document.mozSyntheticDocument) {
      return;
@@ -385,7 +387,7 @@ var AboutReaderListener = {

    // Only send updates when there are articles; there's no point updating with
    // |false| all the time.
-    if (ReaderMode.isProbablyReaderable(content.document)) {
+    if (Readerable.isProbablyReaderable(content.document)) {
      sendAsyncMessage("Reader:UpdateReaderButton", { isArticle: true });
    } else if (forceNonArticle) {
      sendAsyncMessage("Reader:UpdateReaderButton", { isArticle: false });
--- a/toolkit/components/reader/JSDOMParser.js
+++ b/toolkit/components/reader/JSDOMParser.js
@@ -1,10 +1,4 @@
-/*
- * DO NOT MODIFY THIS FILE DIRECTLY!
- *
- * This is a shared library that is maintained in an external repo:
- * https://github.com/mozilla/readability
- */
-
+/*eslint-env es6:false*/
 /* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */
@@ -33,10 +27,6 @@
 */
 (function (global) {

-  function error(m) {
-    dump("JSDOMParser error: " + m + "\n");
-  }
-
  // XML only defines these and the numeric ones:

  var entityTable = {
@@ -463,16 +453,15 @@
            else
              this.children.push(newNode);
          }
-        } else {
+        } else if (oldNode.nodeType === Node.ELEMENT_NODE) {
          // new node is not an element node.
          // if the old one was, update its element siblings:
-          if (oldNode.nodeType === Node.ELEMENT_NODE) {
-            if (oldNode.previousElementSibling)
-              oldNode.previousElementSibling.nextElementSibling = oldNode.nextElementSibling;
-            if (oldNode.nextElementSibling)
-              oldNode.nextElementSibling.previousElementSibling = oldNode.previousElementSibling;
-            this.children.splice(this.children.indexOf(oldNode), 1);
-          }
+          if (oldNode.previousElementSibling)
+            oldNode.previousElementSibling.nextElementSibling = oldNode.nextElementSibling;
+          if (oldNode.nextElementSibling)
+            oldNode.nextElementSibling.previousElementSibling = oldNode.previousElementSibling;
+          this.children.splice(this.children.indexOf(oldNode), 1);
+
          // If the old node wasn't an element, neither the new nor the old node was an element,
          // and the children array and its members shouldn't need any updating.
        }
@@ -492,8 +481,8 @@
    __JSDOMParser__: true,
  };

-  for (var i in nodeTypes) {
-    Node[i] = Node.prototype[i] = nodeTypes[i];
+  for (var nodeType in nodeTypes) {
+    Node[nodeType] = Node.prototype[nodeType] = nodeTypes[nodeType];
  }

  var Attribute = function (name, value) {
@@ -507,17 +496,9 @@
    },
    setValue: function(newValue) {
      this._value = newValue;
-      delete this._decodedValue;
    },
-    setDecodedValue: function(newValue) {
-      this._value = encodeHTML(newValue);
-      this._decodedValue = newValue;
-    },
-    getDecodedValue: function() {
-      if (typeof this._decodedValue === "undefined") {
-        this._decodedValue = (this._value && decodeHTML(this._value)) || "";
-      }
-      return this._decodedValue;
+    getEncodedValue: function() {
+      return encodeHTML(this._value);
    },
  };

@@ -562,9 +543,10 @@
      this._textContent = newText;
      delete this._innerHTML;
    },
-  }
+  };

-  var Document = function () {
+  var Document = function (url) {
+    this.documentURI = url;
    this.styleSheets = [];
    this.childNodes = [];
    this.children = [];
@@ -604,9 +586,30 @@
      node.textContent = text;
      return node;
    },
+
+    get baseURI() {
+      if (!this.hasOwnProperty("_baseURI")) {
+        this._baseURI = this.documentURI;
+        var baseElements = this.getElementsByTagName("base");
+        var href = baseElements[0] && baseElements[0].getAttribute("href");
+        if (href) {
+          try {
+            this._baseURI = (new URL(href, this._baseURI)).href;
+          } catch (ex) {/* Just fall back to documentURI */}
+        }
+      }
+      return this._baseURI;
+    },
  };

  var Element = function (tag) {
+    // We use this to find the closing tag.
+    this._matchingTag = tag;
+    // We're explicitly a non-namespace aware parser, we just pretend it's all HTML.
+    var lastColonIndex = tag.lastIndexOf(":");
+    if (lastColonIndex != -1) {
+      tag = tag.substring(lastColonIndex + 1);
+    }
    this.attributes = [];
    this.childNodes = [];
    this.children = [];
@@ -655,6 +658,14 @@
      this.setAttribute("src", str);
    },

+    get srcset() {
+      return this.getAttribute("srcset") || "";
+    },
+
+    set srcset(str) {
+      this.setAttribute("srcset", str);
+    },
+
    get nodeName() {
      return this.tagName;
    },
@@ -671,14 +682,14 @@
            for (var j = 0; j < child.attributes.length; j++) {
              var attr = child.attributes[j];
              // the attribute value will be HTML escaped.
-              var val = attr.value;
+              var val = attr.getEncodedValue();
              var quote = (val.indexOf('"') === -1 ? '"' : "'");
-              arr.push(" " + attr.name + '=' + quote + val + quote);
+              arr.push(" " + attr.name + "=" + quote + val + quote);
            }

-            if (child.localName in voidElems) {
+            if (child.localName in voidElems && !child.childNodes.length) {
              // if this is a self-closing element, end it here
-              arr.push(">");
+              arr.push("/>");
            } else {
              // otherwise, add its children
              arr.push(">");
@@ -702,12 +713,13 @@
    set innerHTML(html) {
      var parser = new JSDOMParser();
      var node = parser.parse(html);
-      for (var i = this.childNodes.length; --i >= 0;) {
+      var i;
+      for (i = this.childNodes.length; --i >= 0;) {
        this.childNodes[i].parentNode = null;
      }
      this.childNodes = node.childNodes;
      this.children = node.children;
-      for (var i = this.childNodes.length; --i >= 0;) {
+      for (i = this.childNodes.length; --i >= 0;) {
        this.childNodes[i].parentNode = this;
      }
    },
@@ -748,8 +760,9 @@
    getAttribute: function (name) {
      for (var i = this.attributes.length; --i >= 0;) {
        var attr = this.attributes[i];
-        if (attr.name === name)
-          return attr.getDecodedValue();
+        if (attr.name === name) {
+          return attr.value;
+        }
      }
      return undefined;
    },
@@ -758,11 +771,11 @@
      for (var i = this.attributes.length; --i >= 0;) {
        var attr = this.attributes[i];
        if (attr.name === name) {
-          attr.setDecodedValue(value);
+          attr.setValue(value);
          return;
        }
      }
-      this.attributes.push(new Attribute(name, encodeHTML(value)));
+      this.attributes.push(new Attribute(name, value));
    },

    removeAttribute: function (name) {
@@ -773,7 +786,13 @@
          break;
        }
      }
-    }
+    },
+
+    hasAttribute: function (name) {
+      return this.attributes.some(function (attr) {
+        return attr.name == name;
+      });
+    },
  };

  var Style = function (node) {
@@ -831,7 +850,7 @@
      Style.prototype.__defineSetter__(jsName, function (value) {
        this.setStyle(cssName, value);
      });
-    }) (styleMap[jsName]);
+    })(styleMap[jsName]);
  }

  var JSDOMParser = function () {
@@ -849,9 +868,16 @@
    // makeElementNode(), which saves us from having to allocate a new array
    // every time.
    this.retPair = [];
+
+    this.errorState = "";
  };

  JSDOMParser.prototype = {
+    error: function(m) {
+      dump("JSDOMParser error: " + m + "\n");
+      this.errorState += m + "\n";
+    },
+
    /**
     * Look at the next character without advancing the index.
     */
@@ -906,14 +932,14 @@
      // After a '=', we should see a '"' for the attribute value
      var c = this.nextChar();
      if (c !== '"' && c !== "'") {
-        error("Error reading attribute " + name + ", expecting '\"'");
+        this.error("Error reading attribute " + name + ", expecting '\"'");
        return;
      }

      // Read the attribute value (and consume the matching quote)
      var value = this.readString(c);

-      node.attributes.push(new Attribute(name, value));
+      node.attributes.push(new Attribute(name, decodeHTML(value)));

      return;
    },
@@ -938,7 +964,7 @@
        strBuf.push(c);
        c = this.nextChar();
      }
-      var tag = strBuf.join('');
+      var tag = strBuf.join("");

      if (!tag)
        return false;
@@ -949,7 +975,9 @@
      while (c !== "/" && c !== ">") {
        if (c === undefined)
          return false;
-        while (whitespace.indexOf(this.html[this.currentChar++]) != -1);
+        while (whitespace.indexOf(this.html[this.currentChar++]) != -1) {
+          // Advance cursor to first non-whitespace char.
+        }
        this.currentChar--;
        c = this.nextChar();
        if (c !== "/" && c !== ">") {
@@ -959,19 +987,19 @@
      }

      // If this is a self-closing tag, read '/>'
-      var closed = tag in voidElems;
+      var closed = false;
      if (c === "/") {
        closed = true;
        c = this.nextChar();
        if (c !== ">") {
-          error("expected '>' to close " + tag);
+          this.error("expected '>' to close " + tag);
          return false;
        }
      }

      retPair[0] = node;
      retPair[1] = closed;
-      return true
+      return true;
    },

    /**
@@ -1013,46 +1041,6 @@
      }
    },

-    readScript: function (node) {
-      while (this.currentChar < this.html.length) {
-        var c = this.nextChar();
-        var nextC = this.peekNext();
-        if (c === "<") {
-          if (nextC === "!" || nextC === "?") {
-            // We're still before the ! or ? that is starting this comment:
-            this.currentChar++;
-            node.appendChild(this.discardNextComment());
-            continue;
-          }
-          if (nextC === "/" && this.html.substr(this.currentChar, 8 /*"/script>".length */).toLowerCase() == "/script>") {
-            // Go back before the '<' so we find the end tag.
-            this.currentChar--;
-            // Done with this script tag, the caller will close:
-            return;
-          }
-        }
-        // Either c wasn't a '<' or it was but we couldn't find either a comment
-        // or a closing script tag, so we should just parse as text until the next one
-        // comes along:
-
-        var haveTextNode = node.lastChild && node.lastChild.nodeType === Node.TEXT_NODE;
-        var textNode = haveTextNode ? node.lastChild : new Text();
-        var n = this.html.indexOf("<", this.currentChar);
-        // Decrement this to include the current character *afterwards* so we don't get stuck
-        // looking for the same < all the time.
-        this.currentChar--;
-        if (n === -1) {
-          textNode.innerHTML += this.html.substring(this.currentChar, this.html.length);
-          this.currentChar = this.html.length;
-        } else {
-          textNode.innerHTML += this.html.substring(this.currentChar, n);
-          this.currentChar = n;
-        }
-        if (!haveTextNode)
-          node.appendChild(textNode);
-      }
-    },
-
    discardNextComment: function() {
      if (this.match("--")) {
        this.discardTo("-->");
@@ -1083,18 +1071,31 @@
        return null;

      // Read any text as Text node
+      var textNode;
      if (c !== "<") {
        --this.currentChar;
-        var node = new Text();
+        textNode = new Text();
        var n = this.html.indexOf("<", this.currentChar);
        if (n === -1) {
-          node.innerHTML = this.html.substring(this.currentChar, this.html.length);
+          textNode.innerHTML = this.html.substring(this.currentChar, this.html.length);
          this.currentChar = this.html.length;
        } else {
-          node.innerHTML = this.html.substring(this.currentChar, n);
+          textNode.innerHTML = this.html.substring(this.currentChar, n);
          this.currentChar = n;
        }
-        return node;
+        return textNode;
+      }
+
+      if (this.match("![CDATA[")) {
+        var endChar = this.html.indexOf("]]>", this.currentChar);
+        if (endChar === -1) {
+          this.error("unclosed CDATA section");
+          return null;
+        }
+        textNode = new Text();
+        textNode.textContent = this.html.substring(this.currentChar, endChar);
+        this.currentChar = endChar + ("]]>").length;
+        return textNode;
      }

      c = this.peekNext();
@@ -1127,14 +1128,10 @@

      // If this isn't a void Element, read its child nodes
      if (!closed) {
-        if (localName == "script") {
-          this.readScript(node);
-        } else {
-          this.readChildren(node);
-        }
-        var closingTag = "</" + localName + ">";
+        this.readChildren(node);
+        var closingTag = "</" + node._matchingTag + ">";
        if (!this.match(closingTag)) {
-          error("expected '" + closingTag + "'");
+          this.error("expected '" + closingTag + "' and got " + this.html.substr(this.currentChar, closingTag.length));
          return null;
        }
      }
@@ -1158,9 +1155,9 @@
    /**
     * Parses an HTML string and returns a JS implementation of the Document.
     */
-    parse: function (html) {
+    parse: function (html, url) {
      this.html = html;
-      var doc = this.doc = new Document();
+      var doc = this.doc = new Document(url);
      this.readChildren(doc);

      // If this is an HTML document, remove root-level children except for the
@@ -1188,4 +1185,4 @@
  // Attach JSDOMParser to the global scope
  global.JSDOMParser = JSDOMParser;

-}) (this);
+})(this);
--- a/toolkit/components/reader/Readability-readerable.js
+++ b/toolkit/components/reader/Readability-readerable.js
@@ -0,0 +1,98 @@
+/* eslint-env es6:false */
+/* globals exports */
+/*
+ * Copyright (c) 2010 Arc90 Inc
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This code is heavily based on Arc90's readability.js (1.7.1) script
+ * available at: http://code.google.com/p/arc90labs-readability
+ */
+
+var REGEXPS = {
+  // NOTE: These two regular expressions are duplicated in
+  // Readability.js. Please keep both copies in sync.
+  unlikelyCandidates: /-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,
+  okMaybeItsACandidate: /and|article|body|column|content|main|shadow/i,
+};
+
+function isNodeVisible(node) {
+  // Have to null-check node.style to deal with SVG and MathML nodes.
+  return (!node.style || node.style.display != "none") && !node.hasAttribute("hidden")
+    && (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true");
+}
+
+/**
+ * Decides whether or not the document is reader-able without parsing the whole thing.
+ *
+ * @return boolean Whether or not we suspect Readability.parse() will suceeed at returning an article object.
+ */
+function isProbablyReaderable(doc, isVisible) {
+  if (!isVisible) {
+    isVisible = isNodeVisible;
+  }
+
+  var nodes = doc.querySelectorAll("p, pre");
+
+  // Get <div> nodes which have <br> node(s) and append them into the `nodes` variable.
+  // Some articles' DOM structures might look like
+  // <div>
+  //   Sentences<br>
+  //   <br>
+  //   Sentences<br>
+  // </div>
+  var brNodes = doc.querySelectorAll("div > br");
+  if (brNodes.length) {
+    var set = new Set(nodes);
+    [].forEach.call(brNodes, function(node) {
+      set.add(node.parentNode);
+    });
+    nodes = Array.from(set);
+  }
+
+  var score = 0;
+  // This is a little cheeky, we use the accumulator 'score' to decide what to return from
+  // this callback:
+  return [].some.call(nodes, function(node) {
+    if (!isVisible(node))
+      return false;
+
+    var matchString = node.className + " " + node.id;
+    if (REGEXPS.unlikelyCandidates.test(matchString) &&
+        !REGEXPS.okMaybeItsACandidate.test(matchString)) {
+      return false;
+    }
+
+    if (node.matches("li p")) {
+      return false;
+    }
+
+    var textContentLength = node.textContent.trim().length;
+    if (textContentLength < 140) {
+      return false;
+    }
+
+    score += Math.sqrt(textContentLength - 140);
+
+    if (score > 20) {
+      return true;
+    }
+    return false;
+  });
+}
+
+if (typeof exports === "object") {
+  exports.isProbablyReaderable = isProbablyReaderable;
+}
--- a/toolkit/components/reader/Readability.js
+++ b/toolkit/components/reader/Readability.js
--- a/toolkit/components/reader/ReaderMode.jsm
+++ b/toolkit/components/reader/ReaderMode.jsm
@@ -27,14 +27,8 @@ XPCOMUtils.defineLazyModuleGetter(this, "CommonUtils", "resource://services-comm
 XPCOMUtils.defineLazyModuleGetter(this, "OS", "resource://gre/modules/osfile.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "ReaderWorker", "resource://gre/modules/reader/ReaderWorker.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "Task", "resource://gre/modules/Task.jsm");
-XPCOMUtils.defineLazyModuleGetter(this, "TelemetryStopwatch", "resource://gre/modules/TelemetryStopwatch.jsm");
-
-XPCOMUtils.defineLazyGetter(this, "Readability", function() {
-  let scope = {};
-  scope.dump = this.dump;
-  Services.scriptloader.loadSubScript("resource://gre/modules/reader/Readability.js", scope);
-  return scope["Readability"];
-});
+//XPCOMUtils.defineLazyModuleGetter(this, "TelemetryStopwatch", "resource://gre/modules/TelemetryStopwatch.jsm");
+XPCOMUtils.defineLazyModuleGetter(this, "Readerable", "resource://gre/modules/Readerable.jsm");

 this.ReaderMode = {
  // Version of the cache schema.
@@ -42,50 +36,6 @@ this.ReaderMode = {

  DEBUG: 0,

-  // Don't try to parse the page if it has too many elements (for memory and
-  // performance reasons)
-  get maxElemsToParse() {
-    delete this.parseNodeLimit;
-
-    Services.prefs.addObserver("reader.parse-node-limit", this, false);
-    return this.parseNodeLimit = Services.prefs.getIntPref("reader.parse-node-limit");
-  },
-
-  get isEnabledForParseOnLoad() {
-    delete this.isEnabledForParseOnLoad;
-
-    // Listen for future pref changes.
-    Services.prefs.addObserver("reader.parse-on-load.", this, false);
-
-    return this.isEnabledForParseOnLoad = this._getStateForParseOnLoad();
-  },
-
-  get isOnLowMemoryPlatform() {
-    let memory = Cc["@mozilla.org/xpcom/memory-service;1"].getService(Ci.nsIMemory);
-    delete this.isOnLowMemoryPlatform;
-    return this.isOnLowMemoryPlatform = memory.isLowMemoryPlatform();
-  },
-
-  _getStateForParseOnLoad: function () {
-    let isEnabled = Services.prefs.getBoolPref("reader.parse-on-load.enabled");
-    let isForceEnabled = Services.prefs.getBoolPref("reader.parse-on-load.force-enabled");
-    // For low-memory devices, don't allow reader mode since it takes up a lot of memory.
-    // See https://bugzilla.mozilla.org/show_bug.cgi?id=792603 for details.
-    return isForceEnabled || (isEnabled && !this.isOnLowMemoryPlatform);
-  },
-
-  observe: function(aMessage, aTopic, aData) {
-    switch(aTopic) {
-      case "nsPref:changed":
-        if (aData.startsWith("reader.parse-on-load.")) {
-          this.isEnabledForParseOnLoad = this._getStateForParseOnLoad();
-        } else if (aData === "reader.parse-node-limit") {
-          this.parseNodeLimit = Services.prefs.getIntPref(aData);
-        }
-        break;
-    }
-  },
-
  /**
   * Returns original URL from an about:reader URL.
   *
@@ -111,39 +61,6 @@ this.ReaderMode = {
    }
  },

-  /**
-   * Decides whether or not a document is reader-able without parsing the whole thing.
-   *
-   * @param doc A document to parse.
-   * @return boolean Whether or not we should show the reader mode button.
-   */
-  isProbablyReaderable: function(doc) {
-    // Only care about 'real' HTML documents:
-    if (doc.mozSyntheticDocument || !(doc instanceof doc.defaultView.HTMLDocument)) {
-      return false;
-    }
-
-    let uri = Services.io.newURI(doc.location.href, null, null);
-    if (!this._shouldCheckUri(uri)) {
-      return false;
-    }
-
-    let utils = this.getUtilsForWin(doc.defaultView);
-    // We pass in a helper function to determine if a node is visible, because
-    // it uses gecko APIs that the engine-agnostic readability code can't rely
-    // upon.
-    return new Readability(uri, doc).isProbablyReaderable(this.isNodeVisible.bind(this, utils));
-  },
-
-  isNodeVisible: function(utils, node) {
-    let bounds = utils.getBoundsWithoutFlushing(node);
-    return bounds.height > 0 && bounds.width > 0;
-  },
-
-  getUtilsForWin: function(win) {
-    return win.QueryInterface(Ci.nsIInterfaceRequestor).getInterface(Ci.nsIDOMWindowUtils);
-  },
-
  /**
   * Gets an article from a loaded browser's document. This method will not attempt
   * to parse certain URIs (e.g. about: URIs).
@@ -154,7 +71,7 @@ this.ReaderMode = {
   */
  parseDocument: Task.async(function* (doc) {
    let uri = Services.io.newURI(doc.documentURI, null, null);
-    if (!this._shouldCheckUri(uri)) {
+    if (!Readerable.shouldCheckUri(uri)) {
      this.log("Reader mode disabled for URI");
      return null;
    }
@@ -171,12 +88,12 @@ this.ReaderMode = {
   */
  downloadAndParseDocument: Task.async(function* (url) {
    let uri = Services.io.newURI(url, null, null);
-    TelemetryStopwatch.start("READER_MODE_DOWNLOAD_MS");
+    //TelemetryStopwatch.start("READER_MODE_DOWNLOAD_MS");
    let doc = yield this._downloadDocument(url).catch(e => {
-      TelemetryStopwatch.finish("READER_MODE_DOWNLOAD_MS");
+      //TelemetryStopwatch.finish("READER_MODE_DOWNLOAD_MS");
      throw e;
    });
-    TelemetryStopwatch.finish("READER_MODE_DOWNLOAD_MS");
+    //TelemetryStopwatch.finish("READER_MODE_DOWNLOAD_MS");
    return yield this._readerParse(uri, doc);
  }),

@@ -306,39 +223,6 @@ this.ReaderMode = {
      dump("Reader: " + msg);
  },

-  _blockedHosts: [
-    "twitter.com",
-    "mail.google.com",
-    "github.com",
-    "reddit.com",
-  ],
-
-  _shouldCheckUri: function (uri) {
-    if (!(uri.schemeIs("http") || uri.schemeIs("https"))) {
-      this.log("Not parsing URI scheme: " + uri.scheme);
-      return false;
-    }
-
-    try {
-      uri.QueryInterface(Ci.nsIURL);
-    } catch (ex) {
-      // If this doesn't work, presumably the URL is not well-formed or something
-      return false;
-    }
-    // Sadly, some high-profile pages have false positives, so bail early for those:
-    let asciiHost = uri.asciiHost;
-    if (this._blockedHosts.some(blockedHost => asciiHost.endsWith(blockedHost))) {
-      return false;
-    }
-
-    if (!uri.filePath || uri.filePath == "/") {
-      this.log("Not parsing home page: " + uri.spec);
-      return false;
-    }
-
-    return true;
-  },
-
  /**
   * Attempts to parse a document into an article. Heavy lifting happens
   * in readerWorker.js.
@@ -349,16 +233,17 @@ this.ReaderMode = {
   * @resolves JS object representing the article, or null if no article is found.
   */
  _readerParse: Task.async(function* (uri, doc) {
-    let histogram = Services.telemetry.getHistogramById("READER_MODE_PARSE_RESULT");
+    //let histogram = Services.telemetry.getHistogramById("READER_MODE_PARSE_RESULT");
    if (this.parseNodeLimit) {
      let numTags = doc.getElementsByTagName("*").length;
      if (numTags > this.parseNodeLimit) {
        this.log("Aborting parse for " + uri.spec + "; " + numTags + " elements found");
-        histogram.add(PARSE_ERROR_TOO_MANY_ELEMENTS);
+        //histogram.add(PARSE_ERROR_TOO_MANY_ELEMENTS);
        return null;
      }
    }

+    let { documentURI } = doc;
    let uriParam = {
      spec: uri.spec,
      host: uri.host,
@@ -367,37 +252,39 @@ this.ReaderMode = {
      pathBase: Services.io.newURI(".", null, uri).spec
    };

-    TelemetryStopwatch.start("READER_MODE_SERIALIZE_DOM_MS");
+    //TelemetryStopwatch.start("READER_MODE_SERIALIZE_DOM_MS");
    let serializer = Cc["@mozilla.org/xmlextras/xmlserializer;1"].
                     createInstance(Ci.nsIDOMSerializer);
    let serializedDoc = serializer.serializeToString(doc);
-    TelemetryStopwatch.finish("READER_MODE_SERIALIZE_DOM_MS");
+    //TelemetryStopwatch.finish("READER_MODE_SERIALIZE_DOM_MS");

-    TelemetryStopwatch.start("READER_MODE_WORKER_PARSE_MS");
+    //TelemetryStopwatch.start("READER_MODE_WORKER_PARSE_MS");
    let article = null;
    try {
      article = yield ReaderWorker.post("parseDocument", [uriParam, serializedDoc]);
    } catch (e) {
      Cu.reportError("Error in ReaderWorker: " + e);
-      histogram.add(PARSE_ERROR_WORKER);
+      //histogram.add(PARSE_ERROR_WORKER);
    }
-    TelemetryStopwatch.finish("READER_MODE_WORKER_PARSE_MS");
+    //TelemetryStopwatch.finish("READER_MODE_WORKER_PARSE_MS");

    if (!article) {
      this.log("Worker did not return an article");
-      histogram.add(PARSE_ERROR_NO_ARTICLE);
+      //histogram.add(PARSE_ERROR_NO_ARTICLE);
      return null;
    }

-    // Readability returns a URI object, but we only care about the URL.
-    article.url = article.uri.spec;
+    // Readability returns a URI object based on the baseURI, but we only care
+    // about the original document's URL from now on. This also avoids spoofing
+    // attempts where the baseURI doesn't match the domain of the documentURI
+    article.url = documentURI;
    delete article.uri;

    let flags = Ci.nsIDocumentEncoder.OutputSelectionOnly | Ci.nsIDocumentEncoder.OutputAbsoluteLinks;
    article.title = Cc["@mozilla.org/parserutils;1"].getService(Ci.nsIParserUtils)
                                                    .convertToPlainText(article.title, flags, 0);

-    histogram.add(PARSE_SUCCESS);
+    //histogram.add(PARSE_SUCCESS);
    return article;
  }),

--- a/toolkit/components/reader/Readerable.js
+++ b/toolkit/components/reader/Readerable.js
@@ -0,0 +1,90 @@
+// -*- indent-tabs-mode: nil; js-indent-level: 2 -*-
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+"use strict";
+
+// This file and Readability-readerable.js are merged together into
+// Readerable.jsm.
+
+/* exported Readerable */
+/* import-globals-from Readability-readerable.js */
+
+const { classes: Cc, interfaces: Ci, utils: Cu } = Components;
+
+Cu.import("resource://gre/modules/Services.jsm");
+Cu.import("resource://gre/modules/XPCOMUtils.jsm");
+
+function isNodeVisible(node) {
+  return node.clientHeight > 0 && node.clientWidth > 0;
+}
+
+var Readerable = {
+  isEnabled: true,
+  isForceEnabled: false,
+
+  get isEnabledForParseOnLoad() {
+    return this.isEnabled || this.isForceEnabled;
+  },
+
+  /**
+   * Decides whether or not a document is reader-able without parsing the whole thing.
+   *
+   * @param doc A document to parse.
+   * @return boolean Whether or not we should show the reader mode button.
+   */
+  isProbablyReaderable(doc) {
+    // Only care about 'real' HTML documents:
+    if (doc.mozSyntheticDocument || !(doc instanceof doc.defaultView.HTMLDocument)) {
+      return false;
+    }
+
+    let uri = Services.io.newURI(doc.location.href, null, null);
+    if (!this.shouldCheckUri(uri)) {
+      return false;
+    }
+    return isProbablyReaderable(doc, isNodeVisible);
+  },
+
+   _blockedHosts: [
+    "amazon.com",
+    "github.com",
+    "mail.google.com",
+    "pinterest.com",
+    "reddit.com",
+    "twitter.com",
+    "youtube.com",
+  ],
+
+  shouldCheckUri(uri, isBaseUri = false) {
+    if (!(uri.schemeIs("http") || uri.schemeIs("https"))) {
+      return false;
+    }
+    if (!isBaseUri) {
+      // Sadly, some high-profile pages have false positives, so bail early for those:
+      let asciiHost = uri.asciiHost;
+      if (this._blockedHosts.some(blockedHost => asciiHost.endsWith(blockedHost))) {
+         return false;
+      }
+      if (uri.filePath == "/") {
+        return false;
+      }
+    }
+
+    return true;
+  },
+
+  observe: function(aMessage, aTopic, aData) {
+    switch(aTopic) {
+      case "nsPref:changed":
+        if (aData === "reader.parse-on-load.enabled") {
+          this.isEnabled = Services.prefs.getBoolPref(aData);
+        } else if (aData === "reader.parse-on-load.force-enabled") {
+          this.isForceEnabled = Services.prefs.getBoolPref(aData);
+        }
+        break;
+    }
+  }
+};
+Services.prefs.addObserver("reader.parse-on-load.", Readerable, false);
+
--- a/toolkit/components/reader/Readerable.jsm
+++ b/toolkit/components/reader/Readerable.jsm
@@ -0,0 +1,11 @@
+// -*- indent-tabs-mode: nil; js-indent-level: 2 -*-
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+"use strict";
+
+var EXPORTED_SYMBOLS = ["Readerable"];
+
+#include Readability-readerable.js
+#include Readerable.js
+
--- a/toolkit/components/reader/moz.build
+++ b/toolkit/components/reader/moz.build
@@ -8,14 +8,18 @@ JAR_MANIFESTS += ['jar.mn']

 EXTRA_JS_MODULES += [
  'AboutReader.jsm',
-  'ReaderMode.jsm'
+  'ReaderMode.jsm',
+]
+
+EXTRA_PP_JS_MODULES += [
+  'Readerable.jsm',
 ]

 EXTRA_JS_MODULES.reader = [
  'JSDOMParser.js',
  'Readability.js',
  'ReaderWorker.js',
-  'ReaderWorker.jsm'
+  'ReaderWorker.jsm',
 ]

 with Files('**'):