diff --git a/.eslintignore b/.eslintignore index 4d1468b70833..d70943eafaff 100644 --- a/.eslintignore +++ b/.eslintignore @@ -370,6 +370,7 @@ toolkit/components/reader/Readability.js toolkit/components/reader/JSDOMParser.js # Uses preprocessing +toolkit/components/reader/Readerable.jsm toolkit/content/widgets/wizard.xml toolkit/modules/AppConstants.jsm toolkit/mozapps/update/tests/data/xpcshellConstantsPP.js diff --git a/browser/actors/AboutReaderChild.jsm b/browser/actors/AboutReaderChild.jsm index 35300c8b7700..0981e7bcf60f 100644 --- a/browser/actors/AboutReaderChild.jsm +++ b/browser/actors/AboutReaderChild.jsm @@ -12,6 +12,8 @@ ChromeUtils.defineModuleGetter(this, "AboutReader", "resource://gre/modules/AboutReader.jsm"); ChromeUtils.defineModuleGetter(this, "ReaderMode", "resource://gre/modules/ReaderMode.jsm"); +ChromeUtils.defineModuleGetter(this, "Readerable", + "resource://gre/modules/Readerable.jsm"); class AboutReaderChild extends ActorChild { constructor(mm) { @@ -102,7 +104,7 @@ class AboutReaderChild extends ActorChild { * painted is not going to work. */ updateReaderButton(forceNonArticle) { - if (!ReaderMode.isEnabledForParseOnLoad || this.isAboutReader || + if (!Readerable.isEnabledForParseOnLoad || this.isAboutReader || !this.content || !(this.content.document instanceof this.content.HTMLDocument) || this.content.document.mozSyntheticDocument) { return; @@ -141,7 +143,7 @@ class AboutReaderChild extends ActorChild { this.cancelPotentialPendingReadabilityCheck(); // Only send updates when there are articles; there's no point updating with // |false| all the time. - if (ReaderMode.isProbablyReaderable(this.content.document)) { + if (Readerable.isProbablyReaderable(this.content.document)) { this.mm.sendAsyncMessage("Reader:UpdateReaderButton", { isArticle: true }); } else if (forceNonArticle) { this.mm.sendAsyncMessage("Reader:UpdateReaderButton", { isArticle: false }); diff --git a/browser/base/content/test/performance/browser_startup_content.js b/browser/base/content/test/performance/browser_startup_content.js index 76c9aaac185e..ed35685b88ce 100644 --- a/browser/base/content/test/performance/browser_startup_content.js +++ b/browser/base/content/test/performance/browser_startup_content.js @@ -56,7 +56,7 @@ const whitelist = { "resource://gre/modules/ActorChild.jsm", "resource://gre/modules/ActorManagerChild.jsm", "resource://gre/modules/E10SUtils.jsm", - "resource://gre/modules/ReaderMode.jsm", + "resource://gre/modules/Readerable.jsm", "resource://gre/modules/WebProgressChild.jsm", // Pocket diff --git a/mobile/android/chrome/content/content.js b/mobile/android/chrome/content/content.js index 31108508aad8..7d4f66ef1b79 100644 --- a/mobile/android/chrome/content/content.js +++ b/mobile/android/chrome/content/content.js @@ -8,6 +8,7 @@ ChromeUtils.import("resource://gre/modules/XPCOMUtils.jsm"); ChromeUtils.defineModuleGetter(this, "AboutReader", "resource://gre/modules/AboutReader.jsm"); ChromeUtils.defineModuleGetter(this, "ReaderMode", "resource://gre/modules/ReaderMode.jsm"); +ChromeUtils.defineModuleGetter(this, "Readerable", "resource://gre/modules/Readerable.jsm"); ChromeUtils.defineModuleGetter(this, "LoginManagerContent", "resource://gre/modules/LoginManagerContent.jsm"); XPCOMUtils.defineLazyGetter(this, "gPipNSSBundle", function() { @@ -486,7 +487,7 @@ var AboutReaderListener = { // Do not show Reader View icon on error pages (bug 1320900) if (this.isErrorPage) { sendAsyncMessage("Reader:UpdateReaderButton", { isArticle: false }); - } else if (!ReaderMode.isEnabledForParseOnLoad || this.isAboutReader || + } else if (!Readerable.isEnabledForParseOnLoad || this.isAboutReader || !(content.document instanceof content.HTMLDocument) || content.document.mozSyntheticDocument) { @@ -522,11 +523,12 @@ var AboutReaderListener = { return; } + Services.console.logStringMessage(`ON PAINT WHEN WAITED FOR\n`); this.cancelPotentialPendingReadabilityCheck(); // Only send updates when there are articles; there's no point updating with // |false| all the time. - if (ReaderMode.isProbablyReaderable(content.document)) { + if (Readerable.isProbablyReaderable(content.document)) { sendAsyncMessage("Reader:UpdateReaderButton", { isArticle: true }); } else if (forceNonArticle) { sendAsyncMessage("Reader:UpdateReaderButton", { isArticle: false }); diff --git a/toolkit/components/reader/Readability-readerable.js b/toolkit/components/reader/Readability-readerable.js new file mode 100644 index 000000000000..f38b418a061b --- /dev/null +++ b/toolkit/components/reader/Readability-readerable.js @@ -0,0 +1,96 @@ +/* eslint-env es6:false */ +/* globals exports */ +/* + * Copyright (c) 2010 Arc90 Inc + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This code is heavily based on Arc90's readability.js (1.7.1) script + * available at: http://code.google.com/p/arc90labs-readability + */ + +var REGEXPS = { + // NOTE: These two regular expressions are duplicated in + // Readability.js. Please keep both copies in sync. + unlikelyCandidates: /-ad-|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i, + okMaybeItsACandidate: /and|article|body|column|main|shadow/i, +}; + +function isNodeVisible(node) { + return node.style.display != "none" && !node.hasAttribute("hidden"); +} + +/** + * Decides whether or not the document is reader-able without parsing the whole thing. + * + * @return boolean Whether or not we suspect Readability.parse() will suceeed at returning an article object. + */ +function isProbablyReaderable(doc, isVisible) { + if (!isVisible) { + isVisible = isNodeVisible; + } + + var nodes = doc.querySelectorAll("p, pre"); + + // Get
nodes which have
node(s) and append them into the `nodes` variable. + // Some articles' DOM structures might look like + //
+ // Sentences
+ //
+ // Sentences
+ //
+ var brNodes = doc.querySelectorAll("div > br"); + if (brNodes.length) { + var set = new Set(nodes); + [].forEach.call(brNodes, function(node) { + set.add(node.parentNode); + }); + nodes = Array.from(set); + } + + var score = 0; + // This is a little cheeky, we use the accumulator 'score' to decide what to return from + // this callback: + return [].some.call(nodes, function(node) { + if (!isVisible(node)) + return false; + + var matchString = node.className + " " + node.id; + if (REGEXPS.unlikelyCandidates.test(matchString) && + !REGEXPS.okMaybeItsACandidate.test(matchString)) { + return false; + } + + if (node.matches("li p")) { + return false; + } + + var textContentLength = node.textContent.trim().length; + if (textContentLength < 140) { + return false; + } + + score += Math.sqrt(textContentLength - 140); + + if (score > 20) { + return true; + } + return false; + }); +} + +if (typeof exports === "object") { + exports.isProbablyReaderable = isProbablyReaderable; +} diff --git a/toolkit/components/reader/Readability.js b/toolkit/components/reader/Readability.js index da89277e3718..deac15904d55 100644 --- a/toolkit/components/reader/Readability.js +++ b/toolkit/components/reader/Readability.js @@ -1,11 +1,4 @@ /*eslint-env es6:false*/ -/* - * DO NOT MODIFY THIS FILE DIRECTLY! - * - * This is a shared library that is maintained in an external repo: - * https://github.com/mozilla/readability - */ - /* * Copyright (c) 2010 Arc90 Inc * @@ -118,8 +111,11 @@ Readability.prototype = { // All of the regular expressions in use within readability. // Defined up here so we don't instantiate them repeatedly in loops. REGEXPS: { + // NOTE: These two regular expressions are duplicated in + // Readability-readerable.js. Please keep both copies in sync. unlikelyCandidates: /-ad-|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i, okMaybeItsACandidate: /and|article|body|column|main|shadow/i, + positive: /article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i, negative: /hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i, extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i, @@ -1711,65 +1707,6 @@ Readability.prototype = { return node.style.display != "none" && !node.hasAttribute("hidden"); }, - /** - * Decides whether or not the document is reader-able without parsing the whole thing. - * - * @return boolean Whether or not we suspect parse() will suceeed at returning an article object. - */ - isProbablyReaderable: function(helperIsVisible) { - var nodes = this._getAllNodesWithTag(this._doc, ["p", "pre"]); - - // Get
nodes which have
node(s) and append them into the `nodes` variable. - // Some articles' DOM structures might look like - //
- // Sentences
- //
- // Sentences
- //
- var brNodes = this._getAllNodesWithTag(this._doc, ["div > br"]); - if (brNodes.length) { - var set = new Set(); - [].forEach.call(brNodes, function(node) { - set.add(node.parentNode); - }); - nodes = [].concat.apply(Array.from(set), nodes); - } - - if (!helperIsVisible) { - helperIsVisible = this._isProbablyVisible; - } - - var score = 0; - // This is a little cheeky, we use the accumulator 'score' to decide what to return from - // this callback: - return this._someNode(nodes, function(node) { - if (helperIsVisible && !helperIsVisible(node)) - return false; - var matchString = node.className + " " + node.id; - - if (this.REGEXPS.unlikelyCandidates.test(matchString) && - !this.REGEXPS.okMaybeItsACandidate.test(matchString)) { - return false; - } - - if (node.matches && node.matches("li p")) { - return false; - } - - var textContentLength = node.textContent.trim().length; - if (textContentLength < 140) { - return false; - } - - score += Math.sqrt(textContentLength - 140); - - if (score > 20) { - return true; - } - return false; - }); - }, - /** * Runs readability. * diff --git a/toolkit/components/reader/ReaderMode.jsm b/toolkit/components/reader/ReaderMode.jsm index f147d5a95520..134ca7a6b1af 100644 --- a/toolkit/components/reader/ReaderMode.jsm +++ b/toolkit/components/reader/ReaderMode.jsm @@ -41,13 +41,7 @@ ChromeUtils.defineModuleGetter(this, "EventDispatcher", "resource://gre/modules/ ChromeUtils.defineModuleGetter(this, "OS", "resource://gre/modules/osfile.jsm"); ChromeUtils.defineModuleGetter(this, "ReaderWorker", "resource://gre/modules/reader/ReaderWorker.jsm"); ChromeUtils.defineModuleGetter(this, "LanguageDetector", "resource:///modules/translation/LanguageDetector.jsm"); - -XPCOMUtils.defineLazyGetter(this, "Readability", function() { - let scope = {}; - scope.dump = this.dump; - Services.scriptloader.loadSubScript("resource://gre/modules/reader/Readability.js", scope); - return scope.Readability; -}); +ChromeUtils.defineModuleGetter(this, "Readerable", "resource://gre/modules/Readerable.jsm"); const gIsFirefoxDesktop = Services.appinfo.ID == "{ec8030f7-c20a-464f-9b0e-13a3a9e97384}"; @@ -57,42 +51,6 @@ var ReaderMode = { DEBUG: 0, - // Don't try to parse the page if it has too many elements (for memory and - // performance reasons) - get maxElemsToParse() { - delete this.parseNodeLimit; - - Services.prefs.addObserver("reader.parse-node-limit", this); - return this.parseNodeLimit = Services.prefs.getIntPref("reader.parse-node-limit"); - }, - - get isEnabledForParseOnLoad() { - delete this.isEnabledForParseOnLoad; - - // Listen for future pref changes. - Services.prefs.addObserver("reader.parse-on-load.", this); - - return this.isEnabledForParseOnLoad = this._getStateForParseOnLoad(); - }, - - _getStateForParseOnLoad() { - let isEnabled = Services.prefs.getBoolPref("reader.parse-on-load.enabled"); - let isForceEnabled = Services.prefs.getBoolPref("reader.parse-on-load.force-enabled"); - return isForceEnabled || isEnabled; - }, - - observe(aMessage, aTopic, aData) { - switch (aTopic) { - case "nsPref:changed": - if (aData.startsWith("reader.parse-on-load.")) { - this.isEnabledForParseOnLoad = this._getStateForParseOnLoad(); - } else if (aData === "reader.parse-node-limit") { - this.parseNodeLimit = Services.prefs.getIntPref(aData); - } - break; - } - }, - /** * Enter the reader mode by going forward one step in history if applicable, * if not, append the about:reader page in the history instead. @@ -197,39 +155,6 @@ var ReaderMode = { return null; }, - /** - * Decides whether or not a document is reader-able without parsing the whole thing. - * - * @param doc A document to parse. - * @return boolean Whether or not we should show the reader mode button. - */ - isProbablyReaderable(doc) { - // Only care about 'real' HTML documents: - if (doc.mozSyntheticDocument || !(doc instanceof doc.defaultView.HTMLDocument)) { - return false; - } - - let uri = Services.io.newURI(doc.location.href); - if (!this._shouldCheckUri(uri)) { - return false; - } - - let utils = this.getUtilsForWin(doc.defaultView); - // We pass in a helper function to determine if a node is visible, because - // it uses gecko APIs that the engine-agnostic readability code can't rely - // upon. - return new Readability(doc).isProbablyReaderable(this.isNodeVisible.bind(this, utils)); - }, - - isNodeVisible(utils, node) { - let bounds = utils.getBoundsWithoutFlushing(node); - return bounds.height > 0 && bounds.width > 0; - }, - - getUtilsForWin(win) { - return win.windowUtils; - }, - /** * Gets an article from a loaded browser's document. This method will not attempt * to parse certain URIs (e.g. about: URIs). @@ -239,7 +164,8 @@ var ReaderMode = { * @resolves JS object representing the article, or null if no article is found. */ parseDocument(doc) { - if (!this._shouldCheckUri(doc.documentURIObject) || !this._shouldCheckUri(doc.baseURIObject, true)) { + if (!Readerable.shouldCheckUri(doc.documentURIObject) || + !Readerable.shouldCheckUri(doc.baseURIObject, true)) { this.log("Reader mode disabled for URI"); return null; } @@ -259,7 +185,8 @@ var ReaderMode = { if (!doc) { return null; } - if (!this._shouldCheckUri(doc.documentURIObject) || !this._shouldCheckUri(doc.baseURIObject, true)) { + if (!Readerable.shouldCheckUri(doc.documentURIObject) || + !Readerable.shouldCheckUri(doc.baseURIObject, true)) { this.log("Reader mode disabled for URI"); return null; } @@ -269,7 +196,7 @@ var ReaderMode = { _downloadDocument(url) { try { - if (!this._shouldCheckUri(Services.io.newURI(url))) { + if (!Readerable.shouldCheckUri(Services.io.newURI(url))) { return null; } } catch (ex) { @@ -415,42 +342,6 @@ var ReaderMode = { dump("Reader: " + msg); }, - _blockedHosts: [ - "amazon.com", - "github.com", - "mail.google.com", - "pinterest.com", - "reddit.com", - "twitter.com", - "youtube.com", - ], - - _shouldCheckUri(uri, isBaseUri = false) { - if (!(uri.schemeIs("http") || uri.schemeIs("https"))) { - this.log("Not parsing URI scheme: " + uri.scheme); - return false; - } - - try { - uri.QueryInterface(Ci.nsIURL); - } catch (ex) { - // If this doesn't work, presumably the URL is not well-formed or something - return false; - } - // Sadly, some high-profile pages have false positives, so bail early for those: - let asciiHost = uri.asciiHost; - if (!isBaseUri && this._blockedHosts.some(blockedHost => asciiHost.endsWith(blockedHost))) { - return false; - } - - if (!isBaseUri && (!uri.filePath || uri.filePath == "/")) { - this.log("Not parsing home page: " + uri.spec); - return false; - } - - return true; - }, - /** * Attempts to parse a document into an article. Heavy lifting happens * in readerWorker.js. @@ -641,3 +532,6 @@ var ReaderMode = { return readingSpeed.get(lang) || readingSpeed.get("en"); }, }; + +XPCOMUtils.defineLazyPreferenceGetter( + ReaderMode, "maxElemsToParse", "reader.parse-node-limit", 0); diff --git a/toolkit/components/reader/Readerable.js b/toolkit/components/reader/Readerable.js new file mode 100644 index 000000000000..509e53d3f761 --- /dev/null +++ b/toolkit/components/reader/Readerable.js @@ -0,0 +1,79 @@ +// -*- indent-tabs-mode: nil; js-indent-level: 2 -*- +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ +"use strict"; + +// This file and Readability-readerable.js are merged together into +// Readerable.jsm. + +/* exported Readerable */ +/* import-globals-from Readability-readerable.js */ + +ChromeUtils.import("resource://gre/modules/Services.jsm"); +ChromeUtils.import("resource://gre/modules/XPCOMUtils.jsm"); + +function isNodeVisible(node) { + return node.clientHeight > 0 && node.clientWidth > 0; +} + +var Readerable = { + get isEnabledForParseOnLoad() { + return this.isEnabled || this.isForceEnabled; + }, + + /** + * Decides whether or not a document is reader-able without parsing the whole thing. + * + * @param doc A document to parse. + * @return boolean Whether or not we should show the reader mode button. + */ + isProbablyReaderable(doc) { + // Only care about 'real' HTML documents: + if (doc.mozSyntheticDocument || !(doc instanceof doc.defaultView.HTMLDocument)) { + return false; + } + + let uri = Services.io.newURI(doc.location.href); + if (!this.shouldCheckUri(uri)) { + return false; + } + + return isProbablyReaderable(doc, isNodeVisible); + }, + + _blockedHosts: [ + "amazon.com", + "github.com", + "mail.google.com", + "pinterest.com", + "reddit.com", + "twitter.com", + "youtube.com", + ], + + shouldCheckUri(uri, isBaseUri = false) { + if (!["http", "https"].includes(uri.scheme)) { + return false; + } + + if (!isBaseUri) { + // Sadly, some high-profile pages have false positives, so bail early for those: + let {host} = uri; + if (this._blockedHosts.some(blockedHost => host.endsWith(blockedHost))) { + return false; + } + + if (uri.filePath == "/") { + return false; + } + } + + return true; + }, +}; + +XPCOMUtils.defineLazyPreferenceGetter( + Readerable, "isEnabled", "reader.parse-on-load.enabled", true); +XPCOMUtils.defineLazyPreferenceGetter( + Readerable, "isForceEnabled", "reader.parse-on-load.force-enabled", false); diff --git a/toolkit/components/reader/Readerable.jsm b/toolkit/components/reader/Readerable.jsm new file mode 100644 index 000000000000..2268487e4296 --- /dev/null +++ b/toolkit/components/reader/Readerable.jsm @@ -0,0 +1,10 @@ +// -*- indent-tabs-mode: nil; js-indent-level: 2 -*- +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ +"use strict"; + +var EXPORTED_SYMBOLS = ["Readerable"]; + +#include Readability-readerable.js +#include Readerable.js diff --git a/toolkit/components/reader/moz.build b/toolkit/components/reader/moz.build index 6863d6542747..c2364ce8deb4 100644 --- a/toolkit/components/reader/moz.build +++ b/toolkit/components/reader/moz.build @@ -8,14 +8,18 @@ JAR_MANIFESTS += ['jar.mn'] EXTRA_JS_MODULES += [ 'AboutReader.jsm', - 'ReaderMode.jsm' + 'ReaderMode.jsm', +] + +EXTRA_PP_JS_MODULES += [ + 'Readerable.jsm', ] EXTRA_JS_MODULES.reader = [ 'JSDOMParser.js', 'Readability.js', 'ReaderWorker.js', - 'ReaderWorker.jsm' + 'ReaderWorker.jsm', ] BROWSER_CHROME_MANIFESTS += [