#583: update Readability to latest tip; update glue to new API

This commit is contained in:
Cameron Kaiser 2019-12-19 22:28:55 -08:00
parent cb5d8f86a7
commit 8bd4c09a76
9 changed files with 1143 additions and 1021 deletions

View File

@ -166,6 +166,7 @@ toolkit/components/places/**
# Uses preprocessing
toolkit/content/contentAreaUtils.js
toolkit/components/jsdownloads/src/DownloadIntegration.jsm
toolkit/components/reader/Readerable.jsm
toolkit/components/search/nsSearchService.js
toolkit/components/url-classifier/**
toolkit/components/urlformatter/nsURLFormatter.js

View File

@ -24,6 +24,8 @@ XPCOMUtils.defineLazyModuleGetter(this, "AboutReader",
"resource://gre/modules/AboutReader.jsm");
XPCOMUtils.defineLazyModuleGetter(this, "ReaderMode",
"resource://gre/modules/ReaderMode.jsm");
XPCOMUtils.defineLazyModuleGetter(this, "Readerable",
"resource://gre/modules/Readerable.jsm");
XPCOMUtils.defineLazyGetter(this, "SimpleServiceDiscovery", function() {
let ssdp = Cu.import("resource://gre/modules/SimpleServiceDiscovery.jsm", {}).SimpleServiceDiscovery;
// Register targets
@ -344,7 +346,7 @@ var AboutReaderListener = {
* painted is not going to work.
*/
updateReaderButton: function(forceNonArticle) {
if (!ReaderMode.isEnabledForParseOnLoad || this.isAboutReader ||
if (!Readerable.isEnabledForParseOnLoad || this.isAboutReader ||
!(content.document instanceof content.HTMLDocument) ||
content.document.mozSyntheticDocument) {
return;
@ -385,7 +387,7 @@ var AboutReaderListener = {
// Only send updates when there are articles; there's no point updating with
// |false| all the time.
if (ReaderMode.isProbablyReaderable(content.document)) {
if (Readerable.isProbablyReaderable(content.document)) {
sendAsyncMessage("Reader:UpdateReaderButton", { isArticle: true });
} else if (forceNonArticle) {
sendAsyncMessage("Reader:UpdateReaderButton", { isArticle: false });

View File

@ -1,10 +1,4 @@
/*
* DO NOT MODIFY THIS FILE DIRECTLY!
*
* This is a shared library that is maintained in an external repo:
* https://github.com/mozilla/readability
*/
/*eslint-env es6:false*/
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
@ -33,10 +27,6 @@
*/
(function (global) {
function error(m) {
dump("JSDOMParser error: " + m + "\n");
}
// XML only defines these and the numeric ones:
var entityTable = {
@ -463,16 +453,15 @@
else
this.children.push(newNode);
}
} else {
} else if (oldNode.nodeType === Node.ELEMENT_NODE) {
// new node is not an element node.
// if the old one was, update its element siblings:
if (oldNode.nodeType === Node.ELEMENT_NODE) {
if (oldNode.previousElementSibling)
oldNode.previousElementSibling.nextElementSibling = oldNode.nextElementSibling;
if (oldNode.nextElementSibling)
oldNode.nextElementSibling.previousElementSibling = oldNode.previousElementSibling;
this.children.splice(this.children.indexOf(oldNode), 1);
}
if (oldNode.previousElementSibling)
oldNode.previousElementSibling.nextElementSibling = oldNode.nextElementSibling;
if (oldNode.nextElementSibling)
oldNode.nextElementSibling.previousElementSibling = oldNode.previousElementSibling;
this.children.splice(this.children.indexOf(oldNode), 1);
// If the old node wasn't an element, neither the new nor the old node was an element,
// and the children array and its members shouldn't need any updating.
}
@ -492,8 +481,8 @@
__JSDOMParser__: true,
};
for (var i in nodeTypes) {
Node[i] = Node.prototype[i] = nodeTypes[i];
for (var nodeType in nodeTypes) {
Node[nodeType] = Node.prototype[nodeType] = nodeTypes[nodeType];
}
var Attribute = function (name, value) {
@ -507,17 +496,9 @@
},
setValue: function(newValue) {
this._value = newValue;
delete this._decodedValue;
},
setDecodedValue: function(newValue) {
this._value = encodeHTML(newValue);
this._decodedValue = newValue;
},
getDecodedValue: function() {
if (typeof this._decodedValue === "undefined") {
this._decodedValue = (this._value && decodeHTML(this._value)) || "";
}
return this._decodedValue;
getEncodedValue: function() {
return encodeHTML(this._value);
},
};
@ -562,9 +543,10 @@
this._textContent = newText;
delete this._innerHTML;
},
}
};
var Document = function () {
var Document = function (url) {
this.documentURI = url;
this.styleSheets = [];
this.childNodes = [];
this.children = [];
@ -604,9 +586,30 @@
node.textContent = text;
return node;
},
get baseURI() {
if (!this.hasOwnProperty("_baseURI")) {
this._baseURI = this.documentURI;
var baseElements = this.getElementsByTagName("base");
var href = baseElements[0] && baseElements[0].getAttribute("href");
if (href) {
try {
this._baseURI = (new URL(href, this._baseURI)).href;
} catch (ex) {/* Just fall back to documentURI */}
}
}
return this._baseURI;
},
};
var Element = function (tag) {
// We use this to find the closing tag.
this._matchingTag = tag;
// We're explicitly a non-namespace aware parser, we just pretend it's all HTML.
var lastColonIndex = tag.lastIndexOf(":");
if (lastColonIndex != -1) {
tag = tag.substring(lastColonIndex + 1);
}
this.attributes = [];
this.childNodes = [];
this.children = [];
@ -655,6 +658,14 @@
this.setAttribute("src", str);
},
get srcset() {
return this.getAttribute("srcset") || "";
},
set srcset(str) {
this.setAttribute("srcset", str);
},
get nodeName() {
return this.tagName;
},
@ -671,14 +682,14 @@
for (var j = 0; j < child.attributes.length; j++) {
var attr = child.attributes[j];
// the attribute value will be HTML escaped.
var val = attr.value;
var val = attr.getEncodedValue();
var quote = (val.indexOf('"') === -1 ? '"' : "'");
arr.push(" " + attr.name + '=' + quote + val + quote);
arr.push(" " + attr.name + "=" + quote + val + quote);
}
if (child.localName in voidElems) {
if (child.localName in voidElems && !child.childNodes.length) {
// if this is a self-closing element, end it here
arr.push(">");
arr.push("/>");
} else {
// otherwise, add its children
arr.push(">");
@ -702,12 +713,13 @@
set innerHTML(html) {
var parser = new JSDOMParser();
var node = parser.parse(html);
for (var i = this.childNodes.length; --i >= 0;) {
var i;
for (i = this.childNodes.length; --i >= 0;) {
this.childNodes[i].parentNode = null;
}
this.childNodes = node.childNodes;
this.children = node.children;
for (var i = this.childNodes.length; --i >= 0;) {
for (i = this.childNodes.length; --i >= 0;) {
this.childNodes[i].parentNode = this;
}
},
@ -748,8 +760,9 @@
getAttribute: function (name) {
for (var i = this.attributes.length; --i >= 0;) {
var attr = this.attributes[i];
if (attr.name === name)
return attr.getDecodedValue();
if (attr.name === name) {
return attr.value;
}
}
return undefined;
},
@ -758,11 +771,11 @@
for (var i = this.attributes.length; --i >= 0;) {
var attr = this.attributes[i];
if (attr.name === name) {
attr.setDecodedValue(value);
attr.setValue(value);
return;
}
}
this.attributes.push(new Attribute(name, encodeHTML(value)));
this.attributes.push(new Attribute(name, value));
},
removeAttribute: function (name) {
@ -773,7 +786,13 @@
break;
}
}
}
},
hasAttribute: function (name) {
return this.attributes.some(function (attr) {
return attr.name == name;
});
},
};
var Style = function (node) {
@ -831,7 +850,7 @@
Style.prototype.__defineSetter__(jsName, function (value) {
this.setStyle(cssName, value);
});
}) (styleMap[jsName]);
})(styleMap[jsName]);
}
var JSDOMParser = function () {
@ -849,9 +868,16 @@
// makeElementNode(), which saves us from having to allocate a new array
// every time.
this.retPair = [];
this.errorState = "";
};
JSDOMParser.prototype = {
error: function(m) {
dump("JSDOMParser error: " + m + "\n");
this.errorState += m + "\n";
},
/**
* Look at the next character without advancing the index.
*/
@ -906,14 +932,14 @@
// After a '=', we should see a '"' for the attribute value
var c = this.nextChar();
if (c !== '"' && c !== "'") {
error("Error reading attribute " + name + ", expecting '\"'");
this.error("Error reading attribute " + name + ", expecting '\"'");
return;
}
// Read the attribute value (and consume the matching quote)
var value = this.readString(c);
node.attributes.push(new Attribute(name, value));
node.attributes.push(new Attribute(name, decodeHTML(value)));
return;
},
@ -938,7 +964,7 @@
strBuf.push(c);
c = this.nextChar();
}
var tag = strBuf.join('');
var tag = strBuf.join("");
if (!tag)
return false;
@ -949,7 +975,9 @@
while (c !== "/" && c !== ">") {
if (c === undefined)
return false;
while (whitespace.indexOf(this.html[this.currentChar++]) != -1);
while (whitespace.indexOf(this.html[this.currentChar++]) != -1) {
// Advance cursor to first non-whitespace char.
}
this.currentChar--;
c = this.nextChar();
if (c !== "/" && c !== ">") {
@ -959,19 +987,19 @@
}
// If this is a self-closing tag, read '/>'
var closed = tag in voidElems;
var closed = false;
if (c === "/") {
closed = true;
c = this.nextChar();
if (c !== ">") {
error("expected '>' to close " + tag);
this.error("expected '>' to close " + tag);
return false;
}
}
retPair[0] = node;
retPair[1] = closed;
return true
return true;
},
/**
@ -1013,46 +1041,6 @@
}
},
readScript: function (node) {
while (this.currentChar < this.html.length) {
var c = this.nextChar();
var nextC = this.peekNext();
if (c === "<") {
if (nextC === "!" || nextC === "?") {
// We're still before the ! or ? that is starting this comment:
this.currentChar++;
node.appendChild(this.discardNextComment());
continue;
}
if (nextC === "/" && this.html.substr(this.currentChar, 8 /*"/script>".length */).toLowerCase() == "/script>") {
// Go back before the '<' so we find the end tag.
this.currentChar--;
// Done with this script tag, the caller will close:
return;
}
}
// Either c wasn't a '<' or it was but we couldn't find either a comment
// or a closing script tag, so we should just parse as text until the next one
// comes along:
var haveTextNode = node.lastChild && node.lastChild.nodeType === Node.TEXT_NODE;
var textNode = haveTextNode ? node.lastChild : new Text();
var n = this.html.indexOf("<", this.currentChar);
// Decrement this to include the current character *afterwards* so we don't get stuck
// looking for the same < all the time.
this.currentChar--;
if (n === -1) {
textNode.innerHTML += this.html.substring(this.currentChar, this.html.length);
this.currentChar = this.html.length;
} else {
textNode.innerHTML += this.html.substring(this.currentChar, n);
this.currentChar = n;
}
if (!haveTextNode)
node.appendChild(textNode);
}
},
discardNextComment: function() {
if (this.match("--")) {
this.discardTo("-->");
@ -1083,18 +1071,31 @@
return null;
// Read any text as Text node
var textNode;
if (c !== "<") {
--this.currentChar;
var node = new Text();
textNode = new Text();
var n = this.html.indexOf("<", this.currentChar);
if (n === -1) {
node.innerHTML = this.html.substring(this.currentChar, this.html.length);
textNode.innerHTML = this.html.substring(this.currentChar, this.html.length);
this.currentChar = this.html.length;
} else {
node.innerHTML = this.html.substring(this.currentChar, n);
textNode.innerHTML = this.html.substring(this.currentChar, n);
this.currentChar = n;
}
return node;
return textNode;
}
if (this.match("![CDATA[")) {
var endChar = this.html.indexOf("]]>", this.currentChar);
if (endChar === -1) {
this.error("unclosed CDATA section");
return null;
}
textNode = new Text();
textNode.textContent = this.html.substring(this.currentChar, endChar);
this.currentChar = endChar + ("]]>").length;
return textNode;
}
c = this.peekNext();
@ -1127,14 +1128,10 @@
// If this isn't a void Element, read its child nodes
if (!closed) {
if (localName == "script") {
this.readScript(node);
} else {
this.readChildren(node);
}
var closingTag = "</" + localName + ">";
this.readChildren(node);
var closingTag = "</" + node._matchingTag + ">";
if (!this.match(closingTag)) {
error("expected '" + closingTag + "'");
this.error("expected '" + closingTag + "' and got " + this.html.substr(this.currentChar, closingTag.length));
return null;
}
}
@ -1158,9 +1155,9 @@
/**
* Parses an HTML string and returns a JS implementation of the Document.
*/
parse: function (html) {
parse: function (html, url) {
this.html = html;
var doc = this.doc = new Document();
var doc = this.doc = new Document(url);
this.readChildren(doc);
// If this is an HTML document, remove root-level children except for the
@ -1188,4 +1185,4 @@
// Attach JSDOMParser to the global scope
global.JSDOMParser = JSDOMParser;
}) (this);
})(this);

View File

@ -0,0 +1,98 @@
/* eslint-env es6:false */
/* globals exports */
/*
* Copyright (c) 2010 Arc90 Inc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* This code is heavily based on Arc90's readability.js (1.7.1) script
* available at: http://code.google.com/p/arc90labs-readability
*/
var REGEXPS = {
// NOTE: These two regular expressions are duplicated in
// Readability.js. Please keep both copies in sync.
unlikelyCandidates: /-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,
okMaybeItsACandidate: /and|article|body|column|content|main|shadow/i,
};
function isNodeVisible(node) {
// Have to null-check node.style to deal with SVG and MathML nodes.
return (!node.style || node.style.display != "none") && !node.hasAttribute("hidden")
&& (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true");
}
/**
* Decides whether or not the document is reader-able without parsing the whole thing.
*
* @return boolean Whether or not we suspect Readability.parse() will suceeed at returning an article object.
*/
function isProbablyReaderable(doc, isVisible) {
if (!isVisible) {
isVisible = isNodeVisible;
}
var nodes = doc.querySelectorAll("p, pre");
// Get <div> nodes which have <br> node(s) and append them into the `nodes` variable.
// Some articles' DOM structures might look like
// <div>
// Sentences<br>
// <br>
// Sentences<br>
// </div>
var brNodes = doc.querySelectorAll("div > br");
if (brNodes.length) {
var set = new Set(nodes);
[].forEach.call(brNodes, function(node) {
set.add(node.parentNode);
});
nodes = Array.from(set);
}
var score = 0;
// This is a little cheeky, we use the accumulator 'score' to decide what to return from
// this callback:
return [].some.call(nodes, function(node) {
if (!isVisible(node))
return false;
var matchString = node.className + " " + node.id;
if (REGEXPS.unlikelyCandidates.test(matchString) &&
!REGEXPS.okMaybeItsACandidate.test(matchString)) {
return false;
}
if (node.matches("li p")) {
return false;
}
var textContentLength = node.textContent.trim().length;
if (textContentLength < 140) {
return false;
}
score += Math.sqrt(textContentLength - 140);
if (score > 20) {
return true;
}
return false;
});
}
if (typeof exports === "object") {
exports.isProbablyReaderable = isProbablyReaderable;
}

File diff suppressed because it is too large Load Diff

View File

@ -27,14 +27,8 @@ XPCOMUtils.defineLazyModuleGetter(this, "CommonUtils", "resource://services-comm
XPCOMUtils.defineLazyModuleGetter(this, "OS", "resource://gre/modules/osfile.jsm");
XPCOMUtils.defineLazyModuleGetter(this, "ReaderWorker", "resource://gre/modules/reader/ReaderWorker.jsm");
XPCOMUtils.defineLazyModuleGetter(this, "Task", "resource://gre/modules/Task.jsm");
XPCOMUtils.defineLazyModuleGetter(this, "TelemetryStopwatch", "resource://gre/modules/TelemetryStopwatch.jsm");
XPCOMUtils.defineLazyGetter(this, "Readability", function() {
let scope = {};
scope.dump = this.dump;
Services.scriptloader.loadSubScript("resource://gre/modules/reader/Readability.js", scope);
return scope["Readability"];
});
//XPCOMUtils.defineLazyModuleGetter(this, "TelemetryStopwatch", "resource://gre/modules/TelemetryStopwatch.jsm");
XPCOMUtils.defineLazyModuleGetter(this, "Readerable", "resource://gre/modules/Readerable.jsm");
this.ReaderMode = {
// Version of the cache schema.
@ -42,50 +36,6 @@ this.ReaderMode = {
DEBUG: 0,
// Don't try to parse the page if it has too many elements (for memory and
// performance reasons)
get maxElemsToParse() {
delete this.parseNodeLimit;
Services.prefs.addObserver("reader.parse-node-limit", this, false);
return this.parseNodeLimit = Services.prefs.getIntPref("reader.parse-node-limit");
},
get isEnabledForParseOnLoad() {
delete this.isEnabledForParseOnLoad;
// Listen for future pref changes.
Services.prefs.addObserver("reader.parse-on-load.", this, false);
return this.isEnabledForParseOnLoad = this._getStateForParseOnLoad();
},
get isOnLowMemoryPlatform() {
let memory = Cc["@mozilla.org/xpcom/memory-service;1"].getService(Ci.nsIMemory);
delete this.isOnLowMemoryPlatform;
return this.isOnLowMemoryPlatform = memory.isLowMemoryPlatform();
},
_getStateForParseOnLoad: function () {
let isEnabled = Services.prefs.getBoolPref("reader.parse-on-load.enabled");
let isForceEnabled = Services.prefs.getBoolPref("reader.parse-on-load.force-enabled");
// For low-memory devices, don't allow reader mode since it takes up a lot of memory.
// See https://bugzilla.mozilla.org/show_bug.cgi?id=792603 for details.
return isForceEnabled || (isEnabled && !this.isOnLowMemoryPlatform);
},
observe: function(aMessage, aTopic, aData) {
switch(aTopic) {
case "nsPref:changed":
if (aData.startsWith("reader.parse-on-load.")) {
this.isEnabledForParseOnLoad = this._getStateForParseOnLoad();
} else if (aData === "reader.parse-node-limit") {
this.parseNodeLimit = Services.prefs.getIntPref(aData);
}
break;
}
},
/**
* Returns original URL from an about:reader URL.
*
@ -111,39 +61,6 @@ this.ReaderMode = {
}
},
/**
* Decides whether or not a document is reader-able without parsing the whole thing.
*
* @param doc A document to parse.
* @return boolean Whether or not we should show the reader mode button.
*/
isProbablyReaderable: function(doc) {
// Only care about 'real' HTML documents:
if (doc.mozSyntheticDocument || !(doc instanceof doc.defaultView.HTMLDocument)) {
return false;
}
let uri = Services.io.newURI(doc.location.href, null, null);
if (!this._shouldCheckUri(uri)) {
return false;
}
let utils = this.getUtilsForWin(doc.defaultView);
// We pass in a helper function to determine if a node is visible, because
// it uses gecko APIs that the engine-agnostic readability code can't rely
// upon.
return new Readability(uri, doc).isProbablyReaderable(this.isNodeVisible.bind(this, utils));
},
isNodeVisible: function(utils, node) {
let bounds = utils.getBoundsWithoutFlushing(node);
return bounds.height > 0 && bounds.width > 0;
},
getUtilsForWin: function(win) {
return win.QueryInterface(Ci.nsIInterfaceRequestor).getInterface(Ci.nsIDOMWindowUtils);
},
/**
* Gets an article from a loaded browser's document. This method will not attempt
* to parse certain URIs (e.g. about: URIs).
@ -154,7 +71,7 @@ this.ReaderMode = {
*/
parseDocument: Task.async(function* (doc) {
let uri = Services.io.newURI(doc.documentURI, null, null);
if (!this._shouldCheckUri(uri)) {
if (!Readerable.shouldCheckUri(uri)) {
this.log("Reader mode disabled for URI");
return null;
}
@ -171,12 +88,12 @@ this.ReaderMode = {
*/
downloadAndParseDocument: Task.async(function* (url) {
let uri = Services.io.newURI(url, null, null);
TelemetryStopwatch.start("READER_MODE_DOWNLOAD_MS");
//TelemetryStopwatch.start("READER_MODE_DOWNLOAD_MS");
let doc = yield this._downloadDocument(url).catch(e => {
TelemetryStopwatch.finish("READER_MODE_DOWNLOAD_MS");
//TelemetryStopwatch.finish("READER_MODE_DOWNLOAD_MS");
throw e;
});
TelemetryStopwatch.finish("READER_MODE_DOWNLOAD_MS");
//TelemetryStopwatch.finish("READER_MODE_DOWNLOAD_MS");
return yield this._readerParse(uri, doc);
}),
@ -306,39 +223,6 @@ this.ReaderMode = {
dump("Reader: " + msg);
},
_blockedHosts: [
"twitter.com",
"mail.google.com",
"github.com",
"reddit.com",
],
_shouldCheckUri: function (uri) {
if (!(uri.schemeIs("http") || uri.schemeIs("https"))) {
this.log("Not parsing URI scheme: " + uri.scheme);
return false;
}
try {
uri.QueryInterface(Ci.nsIURL);
} catch (ex) {
// If this doesn't work, presumably the URL is not well-formed or something
return false;
}
// Sadly, some high-profile pages have false positives, so bail early for those:
let asciiHost = uri.asciiHost;
if (this._blockedHosts.some(blockedHost => asciiHost.endsWith(blockedHost))) {
return false;
}
if (!uri.filePath || uri.filePath == "/") {
this.log("Not parsing home page: " + uri.spec);
return false;
}
return true;
},
/**
* Attempts to parse a document into an article. Heavy lifting happens
* in readerWorker.js.
@ -349,16 +233,17 @@ this.ReaderMode = {
* @resolves JS object representing the article, or null if no article is found.
*/
_readerParse: Task.async(function* (uri, doc) {
let histogram = Services.telemetry.getHistogramById("READER_MODE_PARSE_RESULT");
//let histogram = Services.telemetry.getHistogramById("READER_MODE_PARSE_RESULT");
if (this.parseNodeLimit) {
let numTags = doc.getElementsByTagName("*").length;
if (numTags > this.parseNodeLimit) {
this.log("Aborting parse for " + uri.spec + "; " + numTags + " elements found");
histogram.add(PARSE_ERROR_TOO_MANY_ELEMENTS);
//histogram.add(PARSE_ERROR_TOO_MANY_ELEMENTS);
return null;
}
}
let { documentURI } = doc;
let uriParam = {
spec: uri.spec,
host: uri.host,
@ -367,37 +252,39 @@ this.ReaderMode = {
pathBase: Services.io.newURI(".", null, uri).spec
};
TelemetryStopwatch.start("READER_MODE_SERIALIZE_DOM_MS");
//TelemetryStopwatch.start("READER_MODE_SERIALIZE_DOM_MS");
let serializer = Cc["@mozilla.org/xmlextras/xmlserializer;1"].
createInstance(Ci.nsIDOMSerializer);
let serializedDoc = serializer.serializeToString(doc);
TelemetryStopwatch.finish("READER_MODE_SERIALIZE_DOM_MS");
//TelemetryStopwatch.finish("READER_MODE_SERIALIZE_DOM_MS");
TelemetryStopwatch.start("READER_MODE_WORKER_PARSE_MS");
//TelemetryStopwatch.start("READER_MODE_WORKER_PARSE_MS");
let article = null;
try {
article = yield ReaderWorker.post("parseDocument", [uriParam, serializedDoc]);
} catch (e) {
Cu.reportError("Error in ReaderWorker: " + e);
histogram.add(PARSE_ERROR_WORKER);
//histogram.add(PARSE_ERROR_WORKER);
}
TelemetryStopwatch.finish("READER_MODE_WORKER_PARSE_MS");
//TelemetryStopwatch.finish("READER_MODE_WORKER_PARSE_MS");
if (!article) {
this.log("Worker did not return an article");
histogram.add(PARSE_ERROR_NO_ARTICLE);
//histogram.add(PARSE_ERROR_NO_ARTICLE);
return null;
}
// Readability returns a URI object, but we only care about the URL.
article.url = article.uri.spec;
// Readability returns a URI object based on the baseURI, but we only care
// about the original document's URL from now on. This also avoids spoofing
// attempts where the baseURI doesn't match the domain of the documentURI
article.url = documentURI;
delete article.uri;
let flags = Ci.nsIDocumentEncoder.OutputSelectionOnly | Ci.nsIDocumentEncoder.OutputAbsoluteLinks;
article.title = Cc["@mozilla.org/parserutils;1"].getService(Ci.nsIParserUtils)
.convertToPlainText(article.title, flags, 0);
histogram.add(PARSE_SUCCESS);
//histogram.add(PARSE_SUCCESS);
return article;
}),

View File

@ -0,0 +1,90 @@
// -*- indent-tabs-mode: nil; js-indent-level: 2 -*-
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
"use strict";
// This file and Readability-readerable.js are merged together into
// Readerable.jsm.
/* exported Readerable */
/* import-globals-from Readability-readerable.js */
const { classes: Cc, interfaces: Ci, utils: Cu } = Components;
Cu.import("resource://gre/modules/Services.jsm");
Cu.import("resource://gre/modules/XPCOMUtils.jsm");
function isNodeVisible(node) {
return node.clientHeight > 0 && node.clientWidth > 0;
}
var Readerable = {
isEnabled: true,
isForceEnabled: false,
get isEnabledForParseOnLoad() {
return this.isEnabled || this.isForceEnabled;
},
/**
* Decides whether or not a document is reader-able without parsing the whole thing.
*
* @param doc A document to parse.
* @return boolean Whether or not we should show the reader mode button.
*/
isProbablyReaderable(doc) {
// Only care about 'real' HTML documents:
if (doc.mozSyntheticDocument || !(doc instanceof doc.defaultView.HTMLDocument)) {
return false;
}
let uri = Services.io.newURI(doc.location.href, null, null);
if (!this.shouldCheckUri(uri)) {
return false;
}
return isProbablyReaderable(doc, isNodeVisible);
},
_blockedHosts: [
"amazon.com",
"github.com",
"mail.google.com",
"pinterest.com",
"reddit.com",
"twitter.com",
"youtube.com",
],
shouldCheckUri(uri, isBaseUri = false) {
if (!(uri.schemeIs("http") || uri.schemeIs("https"))) {
return false;
}
if (!isBaseUri) {
// Sadly, some high-profile pages have false positives, so bail early for those:
let asciiHost = uri.asciiHost;
if (this._blockedHosts.some(blockedHost => asciiHost.endsWith(blockedHost))) {
return false;
}
if (uri.filePath == "/") {
return false;
}
}
return true;
},
observe: function(aMessage, aTopic, aData) {
switch(aTopic) {
case "nsPref:changed":
if (aData === "reader.parse-on-load.enabled") {
this.isEnabled = Services.prefs.getBoolPref(aData);
} else if (aData === "reader.parse-on-load.force-enabled") {
this.isForceEnabled = Services.prefs.getBoolPref(aData);
}
break;
}
}
};
Services.prefs.addObserver("reader.parse-on-load.", Readerable, false);

View File

@ -0,0 +1,11 @@
// -*- indent-tabs-mode: nil; js-indent-level: 2 -*-
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
"use strict";
var EXPORTED_SYMBOLS = ["Readerable"];
#include Readability-readerable.js
#include Readerable.js

View File

@ -8,14 +8,18 @@ JAR_MANIFESTS += ['jar.mn']
EXTRA_JS_MODULES += [
'AboutReader.jsm',
'ReaderMode.jsm'
'ReaderMode.jsm',
]
EXTRA_PP_JS_MODULES += [
'Readerable.jsm',
]
EXTRA_JS_MODULES.reader = [
'JSDOMParser.js',
'Readability.js',
'ReaderWorker.js',
'ReaderWorker.jsm'
'ReaderWorker.jsm',
]
with Files('**'):