#583: update Readability to latest tip; update glue to new API
This commit is contained in:
parent
cb5d8f86a7
commit
8bd4c09a76
|
@ -166,6 +166,7 @@ toolkit/components/places/**
|
|||
# Uses preprocessing
|
||||
toolkit/content/contentAreaUtils.js
|
||||
toolkit/components/jsdownloads/src/DownloadIntegration.jsm
|
||||
toolkit/components/reader/Readerable.jsm
|
||||
toolkit/components/search/nsSearchService.js
|
||||
toolkit/components/url-classifier/**
|
||||
toolkit/components/urlformatter/nsURLFormatter.js
|
||||
|
|
|
@ -24,6 +24,8 @@ XPCOMUtils.defineLazyModuleGetter(this, "AboutReader",
|
|||
"resource://gre/modules/AboutReader.jsm");
|
||||
XPCOMUtils.defineLazyModuleGetter(this, "ReaderMode",
|
||||
"resource://gre/modules/ReaderMode.jsm");
|
||||
XPCOMUtils.defineLazyModuleGetter(this, "Readerable",
|
||||
"resource://gre/modules/Readerable.jsm");
|
||||
XPCOMUtils.defineLazyGetter(this, "SimpleServiceDiscovery", function() {
|
||||
let ssdp = Cu.import("resource://gre/modules/SimpleServiceDiscovery.jsm", {}).SimpleServiceDiscovery;
|
||||
// Register targets
|
||||
|
@ -344,7 +346,7 @@ var AboutReaderListener = {
|
|||
* painted is not going to work.
|
||||
*/
|
||||
updateReaderButton: function(forceNonArticle) {
|
||||
if (!ReaderMode.isEnabledForParseOnLoad || this.isAboutReader ||
|
||||
if (!Readerable.isEnabledForParseOnLoad || this.isAboutReader ||
|
||||
!(content.document instanceof content.HTMLDocument) ||
|
||||
content.document.mozSyntheticDocument) {
|
||||
return;
|
||||
|
@ -385,7 +387,7 @@ var AboutReaderListener = {
|
|||
|
||||
// Only send updates when there are articles; there's no point updating with
|
||||
// |false| all the time.
|
||||
if (ReaderMode.isProbablyReaderable(content.document)) {
|
||||
if (Readerable.isProbablyReaderable(content.document)) {
|
||||
sendAsyncMessage("Reader:UpdateReaderButton", { isArticle: true });
|
||||
} else if (forceNonArticle) {
|
||||
sendAsyncMessage("Reader:UpdateReaderButton", { isArticle: false });
|
||||
|
|
|
@ -1,10 +1,4 @@
|
|||
/*
|
||||
* DO NOT MODIFY THIS FILE DIRECTLY!
|
||||
*
|
||||
* This is a shared library that is maintained in an external repo:
|
||||
* https://github.com/mozilla/readability
|
||||
*/
|
||||
|
||||
/*eslint-env es6:false*/
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
* You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
@ -33,10 +27,6 @@
|
|||
*/
|
||||
(function (global) {
|
||||
|
||||
function error(m) {
|
||||
dump("JSDOMParser error: " + m + "\n");
|
||||
}
|
||||
|
||||
// XML only defines these and the numeric ones:
|
||||
|
||||
var entityTable = {
|
||||
|
@ -463,16 +453,15 @@
|
|||
else
|
||||
this.children.push(newNode);
|
||||
}
|
||||
} else {
|
||||
} else if (oldNode.nodeType === Node.ELEMENT_NODE) {
|
||||
// new node is not an element node.
|
||||
// if the old one was, update its element siblings:
|
||||
if (oldNode.nodeType === Node.ELEMENT_NODE) {
|
||||
if (oldNode.previousElementSibling)
|
||||
oldNode.previousElementSibling.nextElementSibling = oldNode.nextElementSibling;
|
||||
if (oldNode.nextElementSibling)
|
||||
oldNode.nextElementSibling.previousElementSibling = oldNode.previousElementSibling;
|
||||
this.children.splice(this.children.indexOf(oldNode), 1);
|
||||
}
|
||||
if (oldNode.previousElementSibling)
|
||||
oldNode.previousElementSibling.nextElementSibling = oldNode.nextElementSibling;
|
||||
if (oldNode.nextElementSibling)
|
||||
oldNode.nextElementSibling.previousElementSibling = oldNode.previousElementSibling;
|
||||
this.children.splice(this.children.indexOf(oldNode), 1);
|
||||
|
||||
// If the old node wasn't an element, neither the new nor the old node was an element,
|
||||
// and the children array and its members shouldn't need any updating.
|
||||
}
|
||||
|
@ -492,8 +481,8 @@
|
|||
__JSDOMParser__: true,
|
||||
};
|
||||
|
||||
for (var i in nodeTypes) {
|
||||
Node[i] = Node.prototype[i] = nodeTypes[i];
|
||||
for (var nodeType in nodeTypes) {
|
||||
Node[nodeType] = Node.prototype[nodeType] = nodeTypes[nodeType];
|
||||
}
|
||||
|
||||
var Attribute = function (name, value) {
|
||||
|
@ -507,17 +496,9 @@
|
|||
},
|
||||
setValue: function(newValue) {
|
||||
this._value = newValue;
|
||||
delete this._decodedValue;
|
||||
},
|
||||
setDecodedValue: function(newValue) {
|
||||
this._value = encodeHTML(newValue);
|
||||
this._decodedValue = newValue;
|
||||
},
|
||||
getDecodedValue: function() {
|
||||
if (typeof this._decodedValue === "undefined") {
|
||||
this._decodedValue = (this._value && decodeHTML(this._value)) || "";
|
||||
}
|
||||
return this._decodedValue;
|
||||
getEncodedValue: function() {
|
||||
return encodeHTML(this._value);
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -562,9 +543,10 @@
|
|||
this._textContent = newText;
|
||||
delete this._innerHTML;
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
var Document = function () {
|
||||
var Document = function (url) {
|
||||
this.documentURI = url;
|
||||
this.styleSheets = [];
|
||||
this.childNodes = [];
|
||||
this.children = [];
|
||||
|
@ -604,9 +586,30 @@
|
|||
node.textContent = text;
|
||||
return node;
|
||||
},
|
||||
|
||||
get baseURI() {
|
||||
if (!this.hasOwnProperty("_baseURI")) {
|
||||
this._baseURI = this.documentURI;
|
||||
var baseElements = this.getElementsByTagName("base");
|
||||
var href = baseElements[0] && baseElements[0].getAttribute("href");
|
||||
if (href) {
|
||||
try {
|
||||
this._baseURI = (new URL(href, this._baseURI)).href;
|
||||
} catch (ex) {/* Just fall back to documentURI */}
|
||||
}
|
||||
}
|
||||
return this._baseURI;
|
||||
},
|
||||
};
|
||||
|
||||
var Element = function (tag) {
|
||||
// We use this to find the closing tag.
|
||||
this._matchingTag = tag;
|
||||
// We're explicitly a non-namespace aware parser, we just pretend it's all HTML.
|
||||
var lastColonIndex = tag.lastIndexOf(":");
|
||||
if (lastColonIndex != -1) {
|
||||
tag = tag.substring(lastColonIndex + 1);
|
||||
}
|
||||
this.attributes = [];
|
||||
this.childNodes = [];
|
||||
this.children = [];
|
||||
|
@ -655,6 +658,14 @@
|
|||
this.setAttribute("src", str);
|
||||
},
|
||||
|
||||
get srcset() {
|
||||
return this.getAttribute("srcset") || "";
|
||||
},
|
||||
|
||||
set srcset(str) {
|
||||
this.setAttribute("srcset", str);
|
||||
},
|
||||
|
||||
get nodeName() {
|
||||
return this.tagName;
|
||||
},
|
||||
|
@ -671,14 +682,14 @@
|
|||
for (var j = 0; j < child.attributes.length; j++) {
|
||||
var attr = child.attributes[j];
|
||||
// the attribute value will be HTML escaped.
|
||||
var val = attr.value;
|
||||
var val = attr.getEncodedValue();
|
||||
var quote = (val.indexOf('"') === -1 ? '"' : "'");
|
||||
arr.push(" " + attr.name + '=' + quote + val + quote);
|
||||
arr.push(" " + attr.name + "=" + quote + val + quote);
|
||||
}
|
||||
|
||||
if (child.localName in voidElems) {
|
||||
if (child.localName in voidElems && !child.childNodes.length) {
|
||||
// if this is a self-closing element, end it here
|
||||
arr.push(">");
|
||||
arr.push("/>");
|
||||
} else {
|
||||
// otherwise, add its children
|
||||
arr.push(">");
|
||||
|
@ -702,12 +713,13 @@
|
|||
set innerHTML(html) {
|
||||
var parser = new JSDOMParser();
|
||||
var node = parser.parse(html);
|
||||
for (var i = this.childNodes.length; --i >= 0;) {
|
||||
var i;
|
||||
for (i = this.childNodes.length; --i >= 0;) {
|
||||
this.childNodes[i].parentNode = null;
|
||||
}
|
||||
this.childNodes = node.childNodes;
|
||||
this.children = node.children;
|
||||
for (var i = this.childNodes.length; --i >= 0;) {
|
||||
for (i = this.childNodes.length; --i >= 0;) {
|
||||
this.childNodes[i].parentNode = this;
|
||||
}
|
||||
},
|
||||
|
@ -748,8 +760,9 @@
|
|||
getAttribute: function (name) {
|
||||
for (var i = this.attributes.length; --i >= 0;) {
|
||||
var attr = this.attributes[i];
|
||||
if (attr.name === name)
|
||||
return attr.getDecodedValue();
|
||||
if (attr.name === name) {
|
||||
return attr.value;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
},
|
||||
|
@ -758,11 +771,11 @@
|
|||
for (var i = this.attributes.length; --i >= 0;) {
|
||||
var attr = this.attributes[i];
|
||||
if (attr.name === name) {
|
||||
attr.setDecodedValue(value);
|
||||
attr.setValue(value);
|
||||
return;
|
||||
}
|
||||
}
|
||||
this.attributes.push(new Attribute(name, encodeHTML(value)));
|
||||
this.attributes.push(new Attribute(name, value));
|
||||
},
|
||||
|
||||
removeAttribute: function (name) {
|
||||
|
@ -773,7 +786,13 @@
|
|||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
hasAttribute: function (name) {
|
||||
return this.attributes.some(function (attr) {
|
||||
return attr.name == name;
|
||||
});
|
||||
},
|
||||
};
|
||||
|
||||
var Style = function (node) {
|
||||
|
@ -831,7 +850,7 @@
|
|||
Style.prototype.__defineSetter__(jsName, function (value) {
|
||||
this.setStyle(cssName, value);
|
||||
});
|
||||
}) (styleMap[jsName]);
|
||||
})(styleMap[jsName]);
|
||||
}
|
||||
|
||||
var JSDOMParser = function () {
|
||||
|
@ -849,9 +868,16 @@
|
|||
// makeElementNode(), which saves us from having to allocate a new array
|
||||
// every time.
|
||||
this.retPair = [];
|
||||
|
||||
this.errorState = "";
|
||||
};
|
||||
|
||||
JSDOMParser.prototype = {
|
||||
error: function(m) {
|
||||
dump("JSDOMParser error: " + m + "\n");
|
||||
this.errorState += m + "\n";
|
||||
},
|
||||
|
||||
/**
|
||||
* Look at the next character without advancing the index.
|
||||
*/
|
||||
|
@ -906,14 +932,14 @@
|
|||
// After a '=', we should see a '"' for the attribute value
|
||||
var c = this.nextChar();
|
||||
if (c !== '"' && c !== "'") {
|
||||
error("Error reading attribute " + name + ", expecting '\"'");
|
||||
this.error("Error reading attribute " + name + ", expecting '\"'");
|
||||
return;
|
||||
}
|
||||
|
||||
// Read the attribute value (and consume the matching quote)
|
||||
var value = this.readString(c);
|
||||
|
||||
node.attributes.push(new Attribute(name, value));
|
||||
node.attributes.push(new Attribute(name, decodeHTML(value)));
|
||||
|
||||
return;
|
||||
},
|
||||
|
@ -938,7 +964,7 @@
|
|||
strBuf.push(c);
|
||||
c = this.nextChar();
|
||||
}
|
||||
var tag = strBuf.join('');
|
||||
var tag = strBuf.join("");
|
||||
|
||||
if (!tag)
|
||||
return false;
|
||||
|
@ -949,7 +975,9 @@
|
|||
while (c !== "/" && c !== ">") {
|
||||
if (c === undefined)
|
||||
return false;
|
||||
while (whitespace.indexOf(this.html[this.currentChar++]) != -1);
|
||||
while (whitespace.indexOf(this.html[this.currentChar++]) != -1) {
|
||||
// Advance cursor to first non-whitespace char.
|
||||
}
|
||||
this.currentChar--;
|
||||
c = this.nextChar();
|
||||
if (c !== "/" && c !== ">") {
|
||||
|
@ -959,19 +987,19 @@
|
|||
}
|
||||
|
||||
// If this is a self-closing tag, read '/>'
|
||||
var closed = tag in voidElems;
|
||||
var closed = false;
|
||||
if (c === "/") {
|
||||
closed = true;
|
||||
c = this.nextChar();
|
||||
if (c !== ">") {
|
||||
error("expected '>' to close " + tag);
|
||||
this.error("expected '>' to close " + tag);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
retPair[0] = node;
|
||||
retPair[1] = closed;
|
||||
return true
|
||||
return true;
|
||||
},
|
||||
|
||||
/**
|
||||
|
@ -1013,46 +1041,6 @@
|
|||
}
|
||||
},
|
||||
|
||||
readScript: function (node) {
|
||||
while (this.currentChar < this.html.length) {
|
||||
var c = this.nextChar();
|
||||
var nextC = this.peekNext();
|
||||
if (c === "<") {
|
||||
if (nextC === "!" || nextC === "?") {
|
||||
// We're still before the ! or ? that is starting this comment:
|
||||
this.currentChar++;
|
||||
node.appendChild(this.discardNextComment());
|
||||
continue;
|
||||
}
|
||||
if (nextC === "/" && this.html.substr(this.currentChar, 8 /*"/script>".length */).toLowerCase() == "/script>") {
|
||||
// Go back before the '<' so we find the end tag.
|
||||
this.currentChar--;
|
||||
// Done with this script tag, the caller will close:
|
||||
return;
|
||||
}
|
||||
}
|
||||
// Either c wasn't a '<' or it was but we couldn't find either a comment
|
||||
// or a closing script tag, so we should just parse as text until the next one
|
||||
// comes along:
|
||||
|
||||
var haveTextNode = node.lastChild && node.lastChild.nodeType === Node.TEXT_NODE;
|
||||
var textNode = haveTextNode ? node.lastChild : new Text();
|
||||
var n = this.html.indexOf("<", this.currentChar);
|
||||
// Decrement this to include the current character *afterwards* so we don't get stuck
|
||||
// looking for the same < all the time.
|
||||
this.currentChar--;
|
||||
if (n === -1) {
|
||||
textNode.innerHTML += this.html.substring(this.currentChar, this.html.length);
|
||||
this.currentChar = this.html.length;
|
||||
} else {
|
||||
textNode.innerHTML += this.html.substring(this.currentChar, n);
|
||||
this.currentChar = n;
|
||||
}
|
||||
if (!haveTextNode)
|
||||
node.appendChild(textNode);
|
||||
}
|
||||
},
|
||||
|
||||
discardNextComment: function() {
|
||||
if (this.match("--")) {
|
||||
this.discardTo("-->");
|
||||
|
@ -1083,18 +1071,31 @@
|
|||
return null;
|
||||
|
||||
// Read any text as Text node
|
||||
var textNode;
|
||||
if (c !== "<") {
|
||||
--this.currentChar;
|
||||
var node = new Text();
|
||||
textNode = new Text();
|
||||
var n = this.html.indexOf("<", this.currentChar);
|
||||
if (n === -1) {
|
||||
node.innerHTML = this.html.substring(this.currentChar, this.html.length);
|
||||
textNode.innerHTML = this.html.substring(this.currentChar, this.html.length);
|
||||
this.currentChar = this.html.length;
|
||||
} else {
|
||||
node.innerHTML = this.html.substring(this.currentChar, n);
|
||||
textNode.innerHTML = this.html.substring(this.currentChar, n);
|
||||
this.currentChar = n;
|
||||
}
|
||||
return node;
|
||||
return textNode;
|
||||
}
|
||||
|
||||
if (this.match("![CDATA[")) {
|
||||
var endChar = this.html.indexOf("]]>", this.currentChar);
|
||||
if (endChar === -1) {
|
||||
this.error("unclosed CDATA section");
|
||||
return null;
|
||||
}
|
||||
textNode = new Text();
|
||||
textNode.textContent = this.html.substring(this.currentChar, endChar);
|
||||
this.currentChar = endChar + ("]]>").length;
|
||||
return textNode;
|
||||
}
|
||||
|
||||
c = this.peekNext();
|
||||
|
@ -1127,14 +1128,10 @@
|
|||
|
||||
// If this isn't a void Element, read its child nodes
|
||||
if (!closed) {
|
||||
if (localName == "script") {
|
||||
this.readScript(node);
|
||||
} else {
|
||||
this.readChildren(node);
|
||||
}
|
||||
var closingTag = "</" + localName + ">";
|
||||
this.readChildren(node);
|
||||
var closingTag = "</" + node._matchingTag + ">";
|
||||
if (!this.match(closingTag)) {
|
||||
error("expected '" + closingTag + "'");
|
||||
this.error("expected '" + closingTag + "' and got " + this.html.substr(this.currentChar, closingTag.length));
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
@ -1158,9 +1155,9 @@
|
|||
/**
|
||||
* Parses an HTML string and returns a JS implementation of the Document.
|
||||
*/
|
||||
parse: function (html) {
|
||||
parse: function (html, url) {
|
||||
this.html = html;
|
||||
var doc = this.doc = new Document();
|
||||
var doc = this.doc = new Document(url);
|
||||
this.readChildren(doc);
|
||||
|
||||
// If this is an HTML document, remove root-level children except for the
|
||||
|
@ -1188,4 +1185,4 @@
|
|||
// Attach JSDOMParser to the global scope
|
||||
global.JSDOMParser = JSDOMParser;
|
||||
|
||||
}) (this);
|
||||
})(this);
|
||||
|
|
|
@ -0,0 +1,98 @@
|
|||
/* eslint-env es6:false */
|
||||
/* globals exports */
|
||||
/*
|
||||
* Copyright (c) 2010 Arc90 Inc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This code is heavily based on Arc90's readability.js (1.7.1) script
|
||||
* available at: http://code.google.com/p/arc90labs-readability
|
||||
*/
|
||||
|
||||
var REGEXPS = {
|
||||
// NOTE: These two regular expressions are duplicated in
|
||||
// Readability.js. Please keep both copies in sync.
|
||||
unlikelyCandidates: /-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,
|
||||
okMaybeItsACandidate: /and|article|body|column|content|main|shadow/i,
|
||||
};
|
||||
|
||||
function isNodeVisible(node) {
|
||||
// Have to null-check node.style to deal with SVG and MathML nodes.
|
||||
return (!node.style || node.style.display != "none") && !node.hasAttribute("hidden")
|
||||
&& (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true");
|
||||
}
|
||||
|
||||
/**
|
||||
* Decides whether or not the document is reader-able without parsing the whole thing.
|
||||
*
|
||||
* @return boolean Whether or not we suspect Readability.parse() will suceeed at returning an article object.
|
||||
*/
|
||||
function isProbablyReaderable(doc, isVisible) {
|
||||
if (!isVisible) {
|
||||
isVisible = isNodeVisible;
|
||||
}
|
||||
|
||||
var nodes = doc.querySelectorAll("p, pre");
|
||||
|
||||
// Get <div> nodes which have <br> node(s) and append them into the `nodes` variable.
|
||||
// Some articles' DOM structures might look like
|
||||
// <div>
|
||||
// Sentences<br>
|
||||
// <br>
|
||||
// Sentences<br>
|
||||
// </div>
|
||||
var brNodes = doc.querySelectorAll("div > br");
|
||||
if (brNodes.length) {
|
||||
var set = new Set(nodes);
|
||||
[].forEach.call(brNodes, function(node) {
|
||||
set.add(node.parentNode);
|
||||
});
|
||||
nodes = Array.from(set);
|
||||
}
|
||||
|
||||
var score = 0;
|
||||
// This is a little cheeky, we use the accumulator 'score' to decide what to return from
|
||||
// this callback:
|
||||
return [].some.call(nodes, function(node) {
|
||||
if (!isVisible(node))
|
||||
return false;
|
||||
|
||||
var matchString = node.className + " " + node.id;
|
||||
if (REGEXPS.unlikelyCandidates.test(matchString) &&
|
||||
!REGEXPS.okMaybeItsACandidate.test(matchString)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (node.matches("li p")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
var textContentLength = node.textContent.trim().length;
|
||||
if (textContentLength < 140) {
|
||||
return false;
|
||||
}
|
||||
|
||||
score += Math.sqrt(textContentLength - 140);
|
||||
|
||||
if (score > 20) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
if (typeof exports === "object") {
|
||||
exports.isProbablyReaderable = isProbablyReaderable;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -27,14 +27,8 @@ XPCOMUtils.defineLazyModuleGetter(this, "CommonUtils", "resource://services-comm
|
|||
XPCOMUtils.defineLazyModuleGetter(this, "OS", "resource://gre/modules/osfile.jsm");
|
||||
XPCOMUtils.defineLazyModuleGetter(this, "ReaderWorker", "resource://gre/modules/reader/ReaderWorker.jsm");
|
||||
XPCOMUtils.defineLazyModuleGetter(this, "Task", "resource://gre/modules/Task.jsm");
|
||||
XPCOMUtils.defineLazyModuleGetter(this, "TelemetryStopwatch", "resource://gre/modules/TelemetryStopwatch.jsm");
|
||||
|
||||
XPCOMUtils.defineLazyGetter(this, "Readability", function() {
|
||||
let scope = {};
|
||||
scope.dump = this.dump;
|
||||
Services.scriptloader.loadSubScript("resource://gre/modules/reader/Readability.js", scope);
|
||||
return scope["Readability"];
|
||||
});
|
||||
//XPCOMUtils.defineLazyModuleGetter(this, "TelemetryStopwatch", "resource://gre/modules/TelemetryStopwatch.jsm");
|
||||
XPCOMUtils.defineLazyModuleGetter(this, "Readerable", "resource://gre/modules/Readerable.jsm");
|
||||
|
||||
this.ReaderMode = {
|
||||
// Version of the cache schema.
|
||||
|
@ -42,50 +36,6 @@ this.ReaderMode = {
|
|||
|
||||
DEBUG: 0,
|
||||
|
||||
// Don't try to parse the page if it has too many elements (for memory and
|
||||
// performance reasons)
|
||||
get maxElemsToParse() {
|
||||
delete this.parseNodeLimit;
|
||||
|
||||
Services.prefs.addObserver("reader.parse-node-limit", this, false);
|
||||
return this.parseNodeLimit = Services.prefs.getIntPref("reader.parse-node-limit");
|
||||
},
|
||||
|
||||
get isEnabledForParseOnLoad() {
|
||||
delete this.isEnabledForParseOnLoad;
|
||||
|
||||
// Listen for future pref changes.
|
||||
Services.prefs.addObserver("reader.parse-on-load.", this, false);
|
||||
|
||||
return this.isEnabledForParseOnLoad = this._getStateForParseOnLoad();
|
||||
},
|
||||
|
||||
get isOnLowMemoryPlatform() {
|
||||
let memory = Cc["@mozilla.org/xpcom/memory-service;1"].getService(Ci.nsIMemory);
|
||||
delete this.isOnLowMemoryPlatform;
|
||||
return this.isOnLowMemoryPlatform = memory.isLowMemoryPlatform();
|
||||
},
|
||||
|
||||
_getStateForParseOnLoad: function () {
|
||||
let isEnabled = Services.prefs.getBoolPref("reader.parse-on-load.enabled");
|
||||
let isForceEnabled = Services.prefs.getBoolPref("reader.parse-on-load.force-enabled");
|
||||
// For low-memory devices, don't allow reader mode since it takes up a lot of memory.
|
||||
// See https://bugzilla.mozilla.org/show_bug.cgi?id=792603 for details.
|
||||
return isForceEnabled || (isEnabled && !this.isOnLowMemoryPlatform);
|
||||
},
|
||||
|
||||
observe: function(aMessage, aTopic, aData) {
|
||||
switch(aTopic) {
|
||||
case "nsPref:changed":
|
||||
if (aData.startsWith("reader.parse-on-load.")) {
|
||||
this.isEnabledForParseOnLoad = this._getStateForParseOnLoad();
|
||||
} else if (aData === "reader.parse-node-limit") {
|
||||
this.parseNodeLimit = Services.prefs.getIntPref(aData);
|
||||
}
|
||||
break;
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* Returns original URL from an about:reader URL.
|
||||
*
|
||||
|
@ -111,39 +61,6 @@ this.ReaderMode = {
|
|||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* Decides whether or not a document is reader-able without parsing the whole thing.
|
||||
*
|
||||
* @param doc A document to parse.
|
||||
* @return boolean Whether or not we should show the reader mode button.
|
||||
*/
|
||||
isProbablyReaderable: function(doc) {
|
||||
// Only care about 'real' HTML documents:
|
||||
if (doc.mozSyntheticDocument || !(doc instanceof doc.defaultView.HTMLDocument)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let uri = Services.io.newURI(doc.location.href, null, null);
|
||||
if (!this._shouldCheckUri(uri)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let utils = this.getUtilsForWin(doc.defaultView);
|
||||
// We pass in a helper function to determine if a node is visible, because
|
||||
// it uses gecko APIs that the engine-agnostic readability code can't rely
|
||||
// upon.
|
||||
return new Readability(uri, doc).isProbablyReaderable(this.isNodeVisible.bind(this, utils));
|
||||
},
|
||||
|
||||
isNodeVisible: function(utils, node) {
|
||||
let bounds = utils.getBoundsWithoutFlushing(node);
|
||||
return bounds.height > 0 && bounds.width > 0;
|
||||
},
|
||||
|
||||
getUtilsForWin: function(win) {
|
||||
return win.QueryInterface(Ci.nsIInterfaceRequestor).getInterface(Ci.nsIDOMWindowUtils);
|
||||
},
|
||||
|
||||
/**
|
||||
* Gets an article from a loaded browser's document. This method will not attempt
|
||||
* to parse certain URIs (e.g. about: URIs).
|
||||
|
@ -154,7 +71,7 @@ this.ReaderMode = {
|
|||
*/
|
||||
parseDocument: Task.async(function* (doc) {
|
||||
let uri = Services.io.newURI(doc.documentURI, null, null);
|
||||
if (!this._shouldCheckUri(uri)) {
|
||||
if (!Readerable.shouldCheckUri(uri)) {
|
||||
this.log("Reader mode disabled for URI");
|
||||
return null;
|
||||
}
|
||||
|
@ -171,12 +88,12 @@ this.ReaderMode = {
|
|||
*/
|
||||
downloadAndParseDocument: Task.async(function* (url) {
|
||||
let uri = Services.io.newURI(url, null, null);
|
||||
TelemetryStopwatch.start("READER_MODE_DOWNLOAD_MS");
|
||||
//TelemetryStopwatch.start("READER_MODE_DOWNLOAD_MS");
|
||||
let doc = yield this._downloadDocument(url).catch(e => {
|
||||
TelemetryStopwatch.finish("READER_MODE_DOWNLOAD_MS");
|
||||
//TelemetryStopwatch.finish("READER_MODE_DOWNLOAD_MS");
|
||||
throw e;
|
||||
});
|
||||
TelemetryStopwatch.finish("READER_MODE_DOWNLOAD_MS");
|
||||
//TelemetryStopwatch.finish("READER_MODE_DOWNLOAD_MS");
|
||||
return yield this._readerParse(uri, doc);
|
||||
}),
|
||||
|
||||
|
@ -306,39 +223,6 @@ this.ReaderMode = {
|
|||
dump("Reader: " + msg);
|
||||
},
|
||||
|
||||
_blockedHosts: [
|
||||
"twitter.com",
|
||||
"mail.google.com",
|
||||
"github.com",
|
||||
"reddit.com",
|
||||
],
|
||||
|
||||
_shouldCheckUri: function (uri) {
|
||||
if (!(uri.schemeIs("http") || uri.schemeIs("https"))) {
|
||||
this.log("Not parsing URI scheme: " + uri.scheme);
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
uri.QueryInterface(Ci.nsIURL);
|
||||
} catch (ex) {
|
||||
// If this doesn't work, presumably the URL is not well-formed or something
|
||||
return false;
|
||||
}
|
||||
// Sadly, some high-profile pages have false positives, so bail early for those:
|
||||
let asciiHost = uri.asciiHost;
|
||||
if (this._blockedHosts.some(blockedHost => asciiHost.endsWith(blockedHost))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!uri.filePath || uri.filePath == "/") {
|
||||
this.log("Not parsing home page: " + uri.spec);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
},
|
||||
|
||||
/**
|
||||
* Attempts to parse a document into an article. Heavy lifting happens
|
||||
* in readerWorker.js.
|
||||
|
@ -349,16 +233,17 @@ this.ReaderMode = {
|
|||
* @resolves JS object representing the article, or null if no article is found.
|
||||
*/
|
||||
_readerParse: Task.async(function* (uri, doc) {
|
||||
let histogram = Services.telemetry.getHistogramById("READER_MODE_PARSE_RESULT");
|
||||
//let histogram = Services.telemetry.getHistogramById("READER_MODE_PARSE_RESULT");
|
||||
if (this.parseNodeLimit) {
|
||||
let numTags = doc.getElementsByTagName("*").length;
|
||||
if (numTags > this.parseNodeLimit) {
|
||||
this.log("Aborting parse for " + uri.spec + "; " + numTags + " elements found");
|
||||
histogram.add(PARSE_ERROR_TOO_MANY_ELEMENTS);
|
||||
//histogram.add(PARSE_ERROR_TOO_MANY_ELEMENTS);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
let { documentURI } = doc;
|
||||
let uriParam = {
|
||||
spec: uri.spec,
|
||||
host: uri.host,
|
||||
|
@ -367,37 +252,39 @@ this.ReaderMode = {
|
|||
pathBase: Services.io.newURI(".", null, uri).spec
|
||||
};
|
||||
|
||||
TelemetryStopwatch.start("READER_MODE_SERIALIZE_DOM_MS");
|
||||
//TelemetryStopwatch.start("READER_MODE_SERIALIZE_DOM_MS");
|
||||
let serializer = Cc["@mozilla.org/xmlextras/xmlserializer;1"].
|
||||
createInstance(Ci.nsIDOMSerializer);
|
||||
let serializedDoc = serializer.serializeToString(doc);
|
||||
TelemetryStopwatch.finish("READER_MODE_SERIALIZE_DOM_MS");
|
||||
//TelemetryStopwatch.finish("READER_MODE_SERIALIZE_DOM_MS");
|
||||
|
||||
TelemetryStopwatch.start("READER_MODE_WORKER_PARSE_MS");
|
||||
//TelemetryStopwatch.start("READER_MODE_WORKER_PARSE_MS");
|
||||
let article = null;
|
||||
try {
|
||||
article = yield ReaderWorker.post("parseDocument", [uriParam, serializedDoc]);
|
||||
} catch (e) {
|
||||
Cu.reportError("Error in ReaderWorker: " + e);
|
||||
histogram.add(PARSE_ERROR_WORKER);
|
||||
//histogram.add(PARSE_ERROR_WORKER);
|
||||
}
|
||||
TelemetryStopwatch.finish("READER_MODE_WORKER_PARSE_MS");
|
||||
//TelemetryStopwatch.finish("READER_MODE_WORKER_PARSE_MS");
|
||||
|
||||
if (!article) {
|
||||
this.log("Worker did not return an article");
|
||||
histogram.add(PARSE_ERROR_NO_ARTICLE);
|
||||
//histogram.add(PARSE_ERROR_NO_ARTICLE);
|
||||
return null;
|
||||
}
|
||||
|
||||
// Readability returns a URI object, but we only care about the URL.
|
||||
article.url = article.uri.spec;
|
||||
// Readability returns a URI object based on the baseURI, but we only care
|
||||
// about the original document's URL from now on. This also avoids spoofing
|
||||
// attempts where the baseURI doesn't match the domain of the documentURI
|
||||
article.url = documentURI;
|
||||
delete article.uri;
|
||||
|
||||
let flags = Ci.nsIDocumentEncoder.OutputSelectionOnly | Ci.nsIDocumentEncoder.OutputAbsoluteLinks;
|
||||
article.title = Cc["@mozilla.org/parserutils;1"].getService(Ci.nsIParserUtils)
|
||||
.convertToPlainText(article.title, flags, 0);
|
||||
|
||||
histogram.add(PARSE_SUCCESS);
|
||||
//histogram.add(PARSE_SUCCESS);
|
||||
return article;
|
||||
}),
|
||||
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
// -*- indent-tabs-mode: nil; js-indent-level: 2 -*-
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
* You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
"use strict";
|
||||
|
||||
// This file and Readability-readerable.js are merged together into
|
||||
// Readerable.jsm.
|
||||
|
||||
/* exported Readerable */
|
||||
/* import-globals-from Readability-readerable.js */
|
||||
|
||||
const { classes: Cc, interfaces: Ci, utils: Cu } = Components;
|
||||
|
||||
Cu.import("resource://gre/modules/Services.jsm");
|
||||
Cu.import("resource://gre/modules/XPCOMUtils.jsm");
|
||||
|
||||
function isNodeVisible(node) {
|
||||
return node.clientHeight > 0 && node.clientWidth > 0;
|
||||
}
|
||||
|
||||
var Readerable = {
|
||||
isEnabled: true,
|
||||
isForceEnabled: false,
|
||||
|
||||
get isEnabledForParseOnLoad() {
|
||||
return this.isEnabled || this.isForceEnabled;
|
||||
},
|
||||
|
||||
/**
|
||||
* Decides whether or not a document is reader-able without parsing the whole thing.
|
||||
*
|
||||
* @param doc A document to parse.
|
||||
* @return boolean Whether or not we should show the reader mode button.
|
||||
*/
|
||||
isProbablyReaderable(doc) {
|
||||
// Only care about 'real' HTML documents:
|
||||
if (doc.mozSyntheticDocument || !(doc instanceof doc.defaultView.HTMLDocument)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let uri = Services.io.newURI(doc.location.href, null, null);
|
||||
if (!this.shouldCheckUri(uri)) {
|
||||
return false;
|
||||
}
|
||||
return isProbablyReaderable(doc, isNodeVisible);
|
||||
},
|
||||
|
||||
_blockedHosts: [
|
||||
"amazon.com",
|
||||
"github.com",
|
||||
"mail.google.com",
|
||||
"pinterest.com",
|
||||
"reddit.com",
|
||||
"twitter.com",
|
||||
"youtube.com",
|
||||
],
|
||||
|
||||
shouldCheckUri(uri, isBaseUri = false) {
|
||||
if (!(uri.schemeIs("http") || uri.schemeIs("https"))) {
|
||||
return false;
|
||||
}
|
||||
if (!isBaseUri) {
|
||||
// Sadly, some high-profile pages have false positives, so bail early for those:
|
||||
let asciiHost = uri.asciiHost;
|
||||
if (this._blockedHosts.some(blockedHost => asciiHost.endsWith(blockedHost))) {
|
||||
return false;
|
||||
}
|
||||
if (uri.filePath == "/") {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
},
|
||||
|
||||
observe: function(aMessage, aTopic, aData) {
|
||||
switch(aTopic) {
|
||||
case "nsPref:changed":
|
||||
if (aData === "reader.parse-on-load.enabled") {
|
||||
this.isEnabled = Services.prefs.getBoolPref(aData);
|
||||
} else if (aData === "reader.parse-on-load.force-enabled") {
|
||||
this.isForceEnabled = Services.prefs.getBoolPref(aData);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
Services.prefs.addObserver("reader.parse-on-load.", Readerable, false);
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
// -*- indent-tabs-mode: nil; js-indent-level: 2 -*-
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
* You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
"use strict";
|
||||
|
||||
var EXPORTED_SYMBOLS = ["Readerable"];
|
||||
|
||||
#include Readability-readerable.js
|
||||
#include Readerable.js
|
||||
|
|
@ -8,14 +8,18 @@ JAR_MANIFESTS += ['jar.mn']
|
|||
|
||||
EXTRA_JS_MODULES += [
|
||||
'AboutReader.jsm',
|
||||
'ReaderMode.jsm'
|
||||
'ReaderMode.jsm',
|
||||
]
|
||||
|
||||
EXTRA_PP_JS_MODULES += [
|
||||
'Readerable.jsm',
|
||||
]
|
||||
|
||||
EXTRA_JS_MODULES.reader = [
|
||||
'JSDOMParser.js',
|
||||
'Readability.js',
|
||||
'ReaderWorker.js',
|
||||
'ReaderWorker.jsm'
|
||||
'ReaderWorker.jsm',
|
||||
]
|
||||
|
||||
with Files('**'):
|
||||
|
|
Loading…
Reference in New Issue