diff --git a/toolkit/components/reader/JSDOMParser.js b/toolkit/components/reader/JSDOMParser.js index e19172fc6..30c7d4cd9 100644 --- a/toolkit/components/reader/JSDOMParser.js +++ b/toolkit/components/reader/JSDOMParser.js @@ -308,6 +308,7 @@ } } getElems(this); + elems._isLiveNodeList = true; return elems; } diff --git a/toolkit/components/reader/Readability-readerable.js b/toolkit/components/reader/Readability-readerable.js index a813f5fb2..650f7f35a 100644 --- a/toolkit/components/reader/Readability-readerable.js +++ b/toolkit/components/reader/Readability-readerable.js @@ -29,9 +29,11 @@ var REGEXPS = { }; function isNodeVisible(node) { - // Have to null-check node.style to deal with SVG and MathML nodes. - return (!node.style || node.style.display != "none") && !node.hasAttribute("hidden") - && (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true"); + // Have to null-check node.style and node.className.indexOf to deal with SVG and MathML nodes. + return (!node.style || node.style.display != "none") + && !node.hasAttribute("hidden") + //check for "fallback-image" so that wikimedia math images are displayed + && (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true" || (node.className && node.className.indexOf && node.className.indexOf("fallback-image") !== -1)); } /** diff --git a/toolkit/components/reader/Readability.js b/toolkit/components/reader/Readability.js index b6abe20da..30938791f 100644 --- a/toolkit/components/reader/Readability.js +++ b/toolkit/components/reader/Readability.js @@ -36,6 +36,7 @@ function Readability(doc, options) { options = options || {}; this._doc = doc; + this._docJSDOMParser = this._doc.firstChild.__JSDOMParser__; this._articleTitle = null; this._articleByline = null; this._articleDir = null; @@ -181,6 +182,10 @@ Readability.prototype = { * @return void */ _removeNodes: function(nodeList, filterFn) { + // Avoid ever operating on live node lists. + if (this._docJSDOMParser && nodeList._isLiveNodeList) { + throw new Error("Do not pass live node lists to _removeNodes"); + } for (var i = nodeList.length - 1; i >= 0; i--) { var node = nodeList[i]; var parentNode = node.parentNode; @@ -200,6 +205,10 @@ Readability.prototype = { * @return void */ _replaceNodeTags: function(nodeList, newTagName) { + // Avoid ever operating on live node lists. + if (this._docJSDOMParser && nodeList._isLiveNodeList) { + throw new Error("Do not pass live node lists to _replaceNodeTags"); + } for (var i = nodeList.length - 1; i >= 0; i--) { var node = nodeList[i]; this._setNodeTag(node, newTagName); @@ -332,11 +341,21 @@ Readability.prototype = { this._forEachNode(links, function(link) { var href = link.getAttribute("href"); if (href) { - // Replace links with javascript: URIs with text content, since + // Remove links with javascript: URIs, since // they won't work after scripts have been removed from the page. if (href.indexOf("javascript:") === 0) { - var text = this._doc.createTextNode(link.textContent); - link.parentNode.replaceChild(text, link); + // if the link only contains simple text content, it can be converted to a text node + if (link.childNodes.length === 1 && link.childNodes[0].nodeType === this.TEXT_NODE) { + var text = this._doc.createTextNode(link.textContent); + link.parentNode.replaceChild(text, link); + } else { + // if the link has multiple children, they should all be preserved + var container = this._doc.createElement("span"); + while (link.childNodes.length > 0) { + container.appendChild(link.childNodes[0]); + } + link.parentNode.replaceChild(container, link); + } } else { link.setAttribute("href", toAbsoluteURI(href)); } @@ -441,13 +460,13 @@ Readability.prototype = { var doc = this._doc; // Remove all style tags in head - this._removeNodes(doc.getElementsByTagName("style")); + this._removeNodes(this._getAllNodesWithTag(doc, ["style"])); if (doc.body) { this._replaceBrs(doc.body); } - this._replaceNodeTags(doc.getElementsByTagName("font"), "SPAN"); + this._replaceNodeTags(this._getAllNodesWithTag(doc, ["font"]), "SPAN"); }, /** @@ -527,7 +546,7 @@ Readability.prototype = { _setNodeTag: function (node, tag) { this.log("_setNodeTag", node, tag); - if (node.__JSDOMParser__) { + if (this._docJSDOMParser) { node.localName = tag.toLowerCase(); node.tagName = tag.toUpperCase(); return node; @@ -627,7 +646,7 @@ Readability.prototype = { this._cleanConditionally(articleContent, "div"); // Remove extra paragraphs - this._removeNodes(articleContent.getElementsByTagName("p"), function (paragraph) { + this._removeNodes(this._getAllNodesWithTag(articleContent, ["p"]), function (paragraph) { var imgCount = paragraph.getElementsByTagName("img").length; var embedCount = paragraph.getElementsByTagName("embed").length; var objectCount = paragraph.getElementsByTagName("object").length; @@ -1303,12 +1322,12 @@ Readability.prototype = { * @param Element **/ _removeScripts: function(doc) { - this._removeNodes(doc.getElementsByTagName("script"), function(scriptNode) { + this._removeNodes(this._getAllNodesWithTag(doc, ["script"]), function(scriptNode) { scriptNode.nodeValue = ""; scriptNode.removeAttribute("src"); return true; }); - this._removeNodes(doc.getElementsByTagName("noscript")); + this._removeNodes(this._getAllNodesWithTag(doc, ["noscript"])); }, /** @@ -1491,7 +1510,7 @@ Readability.prototype = { _clean: function(e, tag) { var isEmbed = ["object", "embed", "iframe"].indexOf(tag) !== -1; - this._removeNodes(e.getElementsByTagName(tag), function(element) { + this._removeNodes(this._getAllNodesWithTag(e, [tag]), function(element) { // Allow youtube and vimeo videos through as people usually want to see those. if (isEmbed) { // First, check the elements attributes to see if any of them contain youtube or vimeo @@ -1672,7 +1691,7 @@ Readability.prototype = { // without effecting the traversal. // // TODO: Consider taking into account original contentScore here. - this._removeNodes(e.getElementsByTagName(tag), function(node) { + this._removeNodes(this._getAllNodesWithTag(e, [tag]), function(node) { // First check if this node IS data table, in which case don't remove it. var isDataTable = function(t) { return t._readabilityDataTable; @@ -1706,10 +1725,7 @@ Readability.prototype = { var input = node.getElementsByTagName("input").length; var embedCount = 0; - var embeds = this._concatNodeLists( - node.getElementsByTagName("object"), - node.getElementsByTagName("embed"), - node.getElementsByTagName("iframe")); + var embeds = this._getAllNodesWithTag(node, ["object", "embed", "iframe"]); for (var i = 0; i < embeds.length; i++) { // If this embed has attribute that matches video regex, don't delete it. @@ -1770,11 +1786,9 @@ Readability.prototype = { * @return void **/ _cleanHeaders: function(e) { - for (var headerIndex = 1; headerIndex < 3; headerIndex += 1) { - this._removeNodes(e.getElementsByTagName("h" + headerIndex), function (header) { - return this._getClassWeight(header) < 0; - }); - } + this._removeNodes(this._getAllNodesWithTag(e, ["h1", "h2"]), function (header) { + return this._getClassWeight(header) < 0; + }); }, _flagIsActive: function(flag) { @@ -1786,9 +1800,11 @@ Readability.prototype = { }, _isProbablyVisible: function(node) { + // Have to null-check node.style and node.className.indexOf to deal with SVG and MathML nodes. return (!node.style || node.style.display != "none") && !node.hasAttribute("hidden") - && (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true"); + //check for "fallback-image" so that wikimedia math images are displayed + && (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true" || (node.className && node.className.indexOf && node.className.indexOf("fallback-image") !== -1)); }, /**