mirror of
https://github.com/classilla/tenfourfox.git
synced 2025-01-15 09:30:20 +00:00
#595: pull Readability to tip
This commit is contained in:
parent
ce38568dfb
commit
2777050abd
@ -308,6 +308,7 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
getElems(this);
|
getElems(this);
|
||||||
|
elems._isLiveNodeList = true;
|
||||||
return elems;
|
return elems;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,9 +29,11 @@ var REGEXPS = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
function isNodeVisible(node) {
|
function isNodeVisible(node) {
|
||||||
// Have to null-check node.style to deal with SVG and MathML nodes.
|
// Have to null-check node.style and node.className.indexOf to deal with SVG and MathML nodes.
|
||||||
return (!node.style || node.style.display != "none") && !node.hasAttribute("hidden")
|
return (!node.style || node.style.display != "none")
|
||||||
&& (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true");
|
&& !node.hasAttribute("hidden")
|
||||||
|
//check for "fallback-image" so that wikimedia math images are displayed
|
||||||
|
&& (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true" || (node.className && node.className.indexOf && node.className.indexOf("fallback-image") !== -1));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -36,6 +36,7 @@ function Readability(doc, options) {
|
|||||||
options = options || {};
|
options = options || {};
|
||||||
|
|
||||||
this._doc = doc;
|
this._doc = doc;
|
||||||
|
this._docJSDOMParser = this._doc.firstChild.__JSDOMParser__;
|
||||||
this._articleTitle = null;
|
this._articleTitle = null;
|
||||||
this._articleByline = null;
|
this._articleByline = null;
|
||||||
this._articleDir = null;
|
this._articleDir = null;
|
||||||
@ -181,6 +182,10 @@ Readability.prototype = {
|
|||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
_removeNodes: function(nodeList, filterFn) {
|
_removeNodes: function(nodeList, filterFn) {
|
||||||
|
// Avoid ever operating on live node lists.
|
||||||
|
if (this._docJSDOMParser && nodeList._isLiveNodeList) {
|
||||||
|
throw new Error("Do not pass live node lists to _removeNodes");
|
||||||
|
}
|
||||||
for (var i = nodeList.length - 1; i >= 0; i--) {
|
for (var i = nodeList.length - 1; i >= 0; i--) {
|
||||||
var node = nodeList[i];
|
var node = nodeList[i];
|
||||||
var parentNode = node.parentNode;
|
var parentNode = node.parentNode;
|
||||||
@ -200,6 +205,10 @@ Readability.prototype = {
|
|||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
_replaceNodeTags: function(nodeList, newTagName) {
|
_replaceNodeTags: function(nodeList, newTagName) {
|
||||||
|
// Avoid ever operating on live node lists.
|
||||||
|
if (this._docJSDOMParser && nodeList._isLiveNodeList) {
|
||||||
|
throw new Error("Do not pass live node lists to _replaceNodeTags");
|
||||||
|
}
|
||||||
for (var i = nodeList.length - 1; i >= 0; i--) {
|
for (var i = nodeList.length - 1; i >= 0; i--) {
|
||||||
var node = nodeList[i];
|
var node = nodeList[i];
|
||||||
this._setNodeTag(node, newTagName);
|
this._setNodeTag(node, newTagName);
|
||||||
@ -332,11 +341,21 @@ Readability.prototype = {
|
|||||||
this._forEachNode(links, function(link) {
|
this._forEachNode(links, function(link) {
|
||||||
var href = link.getAttribute("href");
|
var href = link.getAttribute("href");
|
||||||
if (href) {
|
if (href) {
|
||||||
// Replace links with javascript: URIs with text content, since
|
// Remove links with javascript: URIs, since
|
||||||
// they won't work after scripts have been removed from the page.
|
// they won't work after scripts have been removed from the page.
|
||||||
if (href.indexOf("javascript:") === 0) {
|
if (href.indexOf("javascript:") === 0) {
|
||||||
var text = this._doc.createTextNode(link.textContent);
|
// if the link only contains simple text content, it can be converted to a text node
|
||||||
link.parentNode.replaceChild(text, link);
|
if (link.childNodes.length === 1 && link.childNodes[0].nodeType === this.TEXT_NODE) {
|
||||||
|
var text = this._doc.createTextNode(link.textContent);
|
||||||
|
link.parentNode.replaceChild(text, link);
|
||||||
|
} else {
|
||||||
|
// if the link has multiple children, they should all be preserved
|
||||||
|
var container = this._doc.createElement("span");
|
||||||
|
while (link.childNodes.length > 0) {
|
||||||
|
container.appendChild(link.childNodes[0]);
|
||||||
|
}
|
||||||
|
link.parentNode.replaceChild(container, link);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
link.setAttribute("href", toAbsoluteURI(href));
|
link.setAttribute("href", toAbsoluteURI(href));
|
||||||
}
|
}
|
||||||
@ -441,13 +460,13 @@ Readability.prototype = {
|
|||||||
var doc = this._doc;
|
var doc = this._doc;
|
||||||
|
|
||||||
// Remove all style tags in head
|
// Remove all style tags in head
|
||||||
this._removeNodes(doc.getElementsByTagName("style"));
|
this._removeNodes(this._getAllNodesWithTag(doc, ["style"]));
|
||||||
|
|
||||||
if (doc.body) {
|
if (doc.body) {
|
||||||
this._replaceBrs(doc.body);
|
this._replaceBrs(doc.body);
|
||||||
}
|
}
|
||||||
|
|
||||||
this._replaceNodeTags(doc.getElementsByTagName("font"), "SPAN");
|
this._replaceNodeTags(this._getAllNodesWithTag(doc, ["font"]), "SPAN");
|
||||||
},
|
},
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -527,7 +546,7 @@ Readability.prototype = {
|
|||||||
|
|
||||||
_setNodeTag: function (node, tag) {
|
_setNodeTag: function (node, tag) {
|
||||||
this.log("_setNodeTag", node, tag);
|
this.log("_setNodeTag", node, tag);
|
||||||
if (node.__JSDOMParser__) {
|
if (this._docJSDOMParser) {
|
||||||
node.localName = tag.toLowerCase();
|
node.localName = tag.toLowerCase();
|
||||||
node.tagName = tag.toUpperCase();
|
node.tagName = tag.toUpperCase();
|
||||||
return node;
|
return node;
|
||||||
@ -627,7 +646,7 @@ Readability.prototype = {
|
|||||||
this._cleanConditionally(articleContent, "div");
|
this._cleanConditionally(articleContent, "div");
|
||||||
|
|
||||||
// Remove extra paragraphs
|
// Remove extra paragraphs
|
||||||
this._removeNodes(articleContent.getElementsByTagName("p"), function (paragraph) {
|
this._removeNodes(this._getAllNodesWithTag(articleContent, ["p"]), function (paragraph) {
|
||||||
var imgCount = paragraph.getElementsByTagName("img").length;
|
var imgCount = paragraph.getElementsByTagName("img").length;
|
||||||
var embedCount = paragraph.getElementsByTagName("embed").length;
|
var embedCount = paragraph.getElementsByTagName("embed").length;
|
||||||
var objectCount = paragraph.getElementsByTagName("object").length;
|
var objectCount = paragraph.getElementsByTagName("object").length;
|
||||||
@ -1303,12 +1322,12 @@ Readability.prototype = {
|
|||||||
* @param Element
|
* @param Element
|
||||||
**/
|
**/
|
||||||
_removeScripts: function(doc) {
|
_removeScripts: function(doc) {
|
||||||
this._removeNodes(doc.getElementsByTagName("script"), function(scriptNode) {
|
this._removeNodes(this._getAllNodesWithTag(doc, ["script"]), function(scriptNode) {
|
||||||
scriptNode.nodeValue = "";
|
scriptNode.nodeValue = "";
|
||||||
scriptNode.removeAttribute("src");
|
scriptNode.removeAttribute("src");
|
||||||
return true;
|
return true;
|
||||||
});
|
});
|
||||||
this._removeNodes(doc.getElementsByTagName("noscript"));
|
this._removeNodes(this._getAllNodesWithTag(doc, ["noscript"]));
|
||||||
},
|
},
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1491,7 +1510,7 @@ Readability.prototype = {
|
|||||||
_clean: function(e, tag) {
|
_clean: function(e, tag) {
|
||||||
var isEmbed = ["object", "embed", "iframe"].indexOf(tag) !== -1;
|
var isEmbed = ["object", "embed", "iframe"].indexOf(tag) !== -1;
|
||||||
|
|
||||||
this._removeNodes(e.getElementsByTagName(tag), function(element) {
|
this._removeNodes(this._getAllNodesWithTag(e, [tag]), function(element) {
|
||||||
// Allow youtube and vimeo videos through as people usually want to see those.
|
// Allow youtube and vimeo videos through as people usually want to see those.
|
||||||
if (isEmbed) {
|
if (isEmbed) {
|
||||||
// First, check the elements attributes to see if any of them contain youtube or vimeo
|
// First, check the elements attributes to see if any of them contain youtube or vimeo
|
||||||
@ -1672,7 +1691,7 @@ Readability.prototype = {
|
|||||||
// without effecting the traversal.
|
// without effecting the traversal.
|
||||||
//
|
//
|
||||||
// TODO: Consider taking into account original contentScore here.
|
// TODO: Consider taking into account original contentScore here.
|
||||||
this._removeNodes(e.getElementsByTagName(tag), function(node) {
|
this._removeNodes(this._getAllNodesWithTag(e, [tag]), function(node) {
|
||||||
// First check if this node IS data table, in which case don't remove it.
|
// First check if this node IS data table, in which case don't remove it.
|
||||||
var isDataTable = function(t) {
|
var isDataTable = function(t) {
|
||||||
return t._readabilityDataTable;
|
return t._readabilityDataTable;
|
||||||
@ -1706,10 +1725,7 @@ Readability.prototype = {
|
|||||||
var input = node.getElementsByTagName("input").length;
|
var input = node.getElementsByTagName("input").length;
|
||||||
|
|
||||||
var embedCount = 0;
|
var embedCount = 0;
|
||||||
var embeds = this._concatNodeLists(
|
var embeds = this._getAllNodesWithTag(node, ["object", "embed", "iframe"]);
|
||||||
node.getElementsByTagName("object"),
|
|
||||||
node.getElementsByTagName("embed"),
|
|
||||||
node.getElementsByTagName("iframe"));
|
|
||||||
|
|
||||||
for (var i = 0; i < embeds.length; i++) {
|
for (var i = 0; i < embeds.length; i++) {
|
||||||
// If this embed has attribute that matches video regex, don't delete it.
|
// If this embed has attribute that matches video regex, don't delete it.
|
||||||
@ -1770,11 +1786,9 @@ Readability.prototype = {
|
|||||||
* @return void
|
* @return void
|
||||||
**/
|
**/
|
||||||
_cleanHeaders: function(e) {
|
_cleanHeaders: function(e) {
|
||||||
for (var headerIndex = 1; headerIndex < 3; headerIndex += 1) {
|
this._removeNodes(this._getAllNodesWithTag(e, ["h1", "h2"]), function (header) {
|
||||||
this._removeNodes(e.getElementsByTagName("h" + headerIndex), function (header) {
|
return this._getClassWeight(header) < 0;
|
||||||
return this._getClassWeight(header) < 0;
|
});
|
||||||
});
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
|
|
||||||
_flagIsActive: function(flag) {
|
_flagIsActive: function(flag) {
|
||||||
@ -1786,9 +1800,11 @@ Readability.prototype = {
|
|||||||
},
|
},
|
||||||
|
|
||||||
_isProbablyVisible: function(node) {
|
_isProbablyVisible: function(node) {
|
||||||
|
// Have to null-check node.style and node.className.indexOf to deal with SVG and MathML nodes.
|
||||||
return (!node.style || node.style.display != "none")
|
return (!node.style || node.style.display != "none")
|
||||||
&& !node.hasAttribute("hidden")
|
&& !node.hasAttribute("hidden")
|
||||||
&& (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true");
|
//check for "fallback-image" so that wikimedia math images are displayed
|
||||||
|
&& (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true" || (node.className && node.className.indexOf && node.className.indexOf("fallback-image") !== -1));
|
||||||
},
|
},
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Loading…
x
Reference in New Issue
Block a user