node(s) and append them into the `nodes` variable.
+ // Some articles' DOM structures might look like
+ //
+ var brNodes = doc.querySelectorAll("div > br");
+ if (brNodes.length) {
+ var set = new Set(nodes);
+ [].forEach.call(brNodes, function(node) {
+ set.add(node.parentNode);
+ });
+ nodes = Array.from(set);
+ }
+
+ var score = 0;
+ // This is a little cheeky, we use the accumulator 'score' to decide what to return from
+ // this callback:
+ return [].some.call(nodes, function(node) {
+ if (!isVisible(node))
+ return false;
+
+ var matchString = node.className + " " + node.id;
+ if (REGEXPS.unlikelyCandidates.test(matchString) &&
+ !REGEXPS.okMaybeItsACandidate.test(matchString)) {
+ return false;
+ }
+
+ if (node.matches("li p")) {
+ return false;
+ }
+
+ var textContentLength = node.textContent.trim().length;
+ if (textContentLength < 140) {
+ return false;
+ }
+
+ score += Math.sqrt(textContentLength - 140);
+
+ if (score > 20) {
+ return true;
+ }
+ return false;
+ });
+}
+
+if (typeof exports === "object") {
+ exports.isProbablyReaderable = isProbablyReaderable;
+}
diff --git a/toolkit/components/reader/Readability.js b/toolkit/components/reader/Readability.js
index da89277e3718..deac15904d55 100644
--- a/toolkit/components/reader/Readability.js
+++ b/toolkit/components/reader/Readability.js
@@ -1,11 +1,4 @@
/*eslint-env es6:false*/
-/*
- * DO NOT MODIFY THIS FILE DIRECTLY!
- *
- * This is a shared library that is maintained in an external repo:
- * https://github.com/mozilla/readability
- */
-
/*
* Copyright (c) 2010 Arc90 Inc
*
@@ -118,8 +111,11 @@ Readability.prototype = {
// All of the regular expressions in use within readability.
// Defined up here so we don't instantiate them repeatedly in loops.
REGEXPS: {
+ // NOTE: These two regular expressions are duplicated in
+ // Readability-readerable.js. Please keep both copies in sync.
unlikelyCandidates: /-ad-|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,
okMaybeItsACandidate: /and|article|body|column|main|shadow/i,
+
positive: /article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i,
negative: /hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i,
extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i,
@@ -1711,65 +1707,6 @@ Readability.prototype = {
return node.style.display != "none" && !node.hasAttribute("hidden");
},
- /**
- * Decides whether or not the document is reader-able without parsing the whole thing.
- *
- * @return boolean Whether or not we suspect parse() will suceeed at returning an article object.
- */
- isProbablyReaderable: function(helperIsVisible) {
- var nodes = this._getAllNodesWithTag(this._doc, ["p", "pre"]);
-
- // Get
nodes which have
node(s) and append them into the `nodes` variable.
- // Some articles' DOM structures might look like
- //
- // Sentences
- //
- // Sentences
- //
- var brNodes = this._getAllNodesWithTag(this._doc, ["div > br"]);
- if (brNodes.length) {
- var set = new Set();
- [].forEach.call(brNodes, function(node) {
- set.add(node.parentNode);
- });
- nodes = [].concat.apply(Array.from(set), nodes);
- }
-
- if (!helperIsVisible) {
- helperIsVisible = this._isProbablyVisible;
- }
-
- var score = 0;
- // This is a little cheeky, we use the accumulator 'score' to decide what to return from
- // this callback:
- return this._someNode(nodes, function(node) {
- if (helperIsVisible && !helperIsVisible(node))
- return false;
- var matchString = node.className + " " + node.id;
-
- if (this.REGEXPS.unlikelyCandidates.test(matchString) &&
- !this.REGEXPS.okMaybeItsACandidate.test(matchString)) {
- return false;
- }
-
- if (node.matches && node.matches("li p")) {
- return false;
- }
-
- var textContentLength = node.textContent.trim().length;
- if (textContentLength < 140) {
- return false;
- }
-
- score += Math.sqrt(textContentLength - 140);
-
- if (score > 20) {
- return true;
- }
- return false;
- });
- },
-
/**
* Runs readability.
*
diff --git a/toolkit/components/reader/ReaderMode.jsm b/toolkit/components/reader/ReaderMode.jsm
index f147d5a95520..134ca7a6b1af 100644
--- a/toolkit/components/reader/ReaderMode.jsm
+++ b/toolkit/components/reader/ReaderMode.jsm
@@ -41,13 +41,7 @@ ChromeUtils.defineModuleGetter(this, "EventDispatcher", "resource://gre/modules/
ChromeUtils.defineModuleGetter(this, "OS", "resource://gre/modules/osfile.jsm");
ChromeUtils.defineModuleGetter(this, "ReaderWorker", "resource://gre/modules/reader/ReaderWorker.jsm");
ChromeUtils.defineModuleGetter(this, "LanguageDetector", "resource:///modules/translation/LanguageDetector.jsm");
-
-XPCOMUtils.defineLazyGetter(this, "Readability", function() {
- let scope = {};
- scope.dump = this.dump;
- Services.scriptloader.loadSubScript("resource://gre/modules/reader/Readability.js", scope);
- return scope.Readability;
-});
+ChromeUtils.defineModuleGetter(this, "Readerable", "resource://gre/modules/Readerable.jsm");
const gIsFirefoxDesktop = Services.appinfo.ID == "{ec8030f7-c20a-464f-9b0e-13a3a9e97384}";
@@ -57,42 +51,6 @@ var ReaderMode = {
DEBUG: 0,
- // Don't try to parse the page if it has too many elements (for memory and
- // performance reasons)
- get maxElemsToParse() {
- delete this.parseNodeLimit;
-
- Services.prefs.addObserver("reader.parse-node-limit", this);
- return this.parseNodeLimit = Services.prefs.getIntPref("reader.parse-node-limit");
- },
-
- get isEnabledForParseOnLoad() {
- delete this.isEnabledForParseOnLoad;
-
- // Listen for future pref changes.
- Services.prefs.addObserver("reader.parse-on-load.", this);
-
- return this.isEnabledForParseOnLoad = this._getStateForParseOnLoad();
- },
-
- _getStateForParseOnLoad() {
- let isEnabled = Services.prefs.getBoolPref("reader.parse-on-load.enabled");
- let isForceEnabled = Services.prefs.getBoolPref("reader.parse-on-load.force-enabled");
- return isForceEnabled || isEnabled;
- },
-
- observe(aMessage, aTopic, aData) {
- switch (aTopic) {
- case "nsPref:changed":
- if (aData.startsWith("reader.parse-on-load.")) {
- this.isEnabledForParseOnLoad = this._getStateForParseOnLoad();
- } else if (aData === "reader.parse-node-limit") {
- this.parseNodeLimit = Services.prefs.getIntPref(aData);
- }
- break;
- }
- },
-
/**
* Enter the reader mode by going forward one step in history if applicable,
* if not, append the about:reader page in the history instead.
@@ -197,39 +155,6 @@ var ReaderMode = {
return null;
},
- /**
- * Decides whether or not a document is reader-able without parsing the whole thing.
- *
- * @param doc A document to parse.
- * @return boolean Whether or not we should show the reader mode button.
- */
- isProbablyReaderable(doc) {
- // Only care about 'real' HTML documents:
- if (doc.mozSyntheticDocument || !(doc instanceof doc.defaultView.HTMLDocument)) {
- return false;
- }
-
- let uri = Services.io.newURI(doc.location.href);
- if (!this._shouldCheckUri(uri)) {
- return false;
- }
-
- let utils = this.getUtilsForWin(doc.defaultView);
- // We pass in a helper function to determine if a node is visible, because
- // it uses gecko APIs that the engine-agnostic readability code can't rely
- // upon.
- return new Readability(doc).isProbablyReaderable(this.isNodeVisible.bind(this, utils));
- },
-
- isNodeVisible(utils, node) {
- let bounds = utils.getBoundsWithoutFlushing(node);
- return bounds.height > 0 && bounds.width > 0;
- },
-
- getUtilsForWin(win) {
- return win.windowUtils;
- },
-
/**
* Gets an article from a loaded browser's document. This method will not attempt
* to parse certain URIs (e.g. about: URIs).
@@ -239,7 +164,8 @@ var ReaderMode = {
* @resolves JS object representing the article, or null if no article is found.
*/
parseDocument(doc) {
- if (!this._shouldCheckUri(doc.documentURIObject) || !this._shouldCheckUri(doc.baseURIObject, true)) {
+ if (!Readerable.shouldCheckUri(doc.documentURIObject) ||
+ !Readerable.shouldCheckUri(doc.baseURIObject, true)) {
this.log("Reader mode disabled for URI");
return null;
}
@@ -259,7 +185,8 @@ var ReaderMode = {
if (!doc) {
return null;
}
- if (!this._shouldCheckUri(doc.documentURIObject) || !this._shouldCheckUri(doc.baseURIObject, true)) {
+ if (!Readerable.shouldCheckUri(doc.documentURIObject) ||
+ !Readerable.shouldCheckUri(doc.baseURIObject, true)) {
this.log("Reader mode disabled for URI");
return null;
}
@@ -269,7 +196,7 @@ var ReaderMode = {
_downloadDocument(url) {
try {
- if (!this._shouldCheckUri(Services.io.newURI(url))) {
+ if (!Readerable.shouldCheckUri(Services.io.newURI(url))) {
return null;
}
} catch (ex) {
@@ -415,42 +342,6 @@ var ReaderMode = {
dump("Reader: " + msg);
},
- _blockedHosts: [
- "amazon.com",
- "github.com",
- "mail.google.com",
- "pinterest.com",
- "reddit.com",
- "twitter.com",
- "youtube.com",
- ],
-
- _shouldCheckUri(uri, isBaseUri = false) {
- if (!(uri.schemeIs("http") || uri.schemeIs("https"))) {
- this.log("Not parsing URI scheme: " + uri.scheme);
- return false;
- }
-
- try {
- uri.QueryInterface(Ci.nsIURL);
- } catch (ex) {
- // If this doesn't work, presumably the URL is not well-formed or something
- return false;
- }
- // Sadly, some high-profile pages have false positives, so bail early for those:
- let asciiHost = uri.asciiHost;
- if (!isBaseUri && this._blockedHosts.some(blockedHost => asciiHost.endsWith(blockedHost))) {
- return false;
- }
-
- if (!isBaseUri && (!uri.filePath || uri.filePath == "/")) {
- this.log("Not parsing home page: " + uri.spec);
- return false;
- }
-
- return true;
- },
-
/**
* Attempts to parse a document into an article. Heavy lifting happens
* in readerWorker.js.
@@ -641,3 +532,6 @@ var ReaderMode = {
return readingSpeed.get(lang) || readingSpeed.get("en");
},
};
+
+XPCOMUtils.defineLazyPreferenceGetter(
+ ReaderMode, "maxElemsToParse", "reader.parse-node-limit", 0);
diff --git a/toolkit/components/reader/Readerable.js b/toolkit/components/reader/Readerable.js
new file mode 100644
index 000000000000..509e53d3f761
--- /dev/null
+++ b/toolkit/components/reader/Readerable.js
@@ -0,0 +1,79 @@
+// -*- indent-tabs-mode: nil; js-indent-level: 2 -*-
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+"use strict";
+
+// This file and Readability-readerable.js are merged together into
+// Readerable.jsm.
+
+/* exported Readerable */
+/* import-globals-from Readability-readerable.js */
+
+ChromeUtils.import("resource://gre/modules/Services.jsm");
+ChromeUtils.import("resource://gre/modules/XPCOMUtils.jsm");
+
+function isNodeVisible(node) {
+ return node.clientHeight > 0 && node.clientWidth > 0;
+}
+
+var Readerable = {
+ get isEnabledForParseOnLoad() {
+ return this.isEnabled || this.isForceEnabled;
+ },
+
+ /**
+ * Decides whether or not a document is reader-able without parsing the whole thing.
+ *
+ * @param doc A document to parse.
+ * @return boolean Whether or not we should show the reader mode button.
+ */
+ isProbablyReaderable(doc) {
+ // Only care about 'real' HTML documents:
+ if (doc.mozSyntheticDocument || !(doc instanceof doc.defaultView.HTMLDocument)) {
+ return false;
+ }
+
+ let uri = Services.io.newURI(doc.location.href);
+ if (!this.shouldCheckUri(uri)) {
+ return false;
+ }
+
+ return isProbablyReaderable(doc, isNodeVisible);
+ },
+
+ _blockedHosts: [
+ "amazon.com",
+ "github.com",
+ "mail.google.com",
+ "pinterest.com",
+ "reddit.com",
+ "twitter.com",
+ "youtube.com",
+ ],
+
+ shouldCheckUri(uri, isBaseUri = false) {
+ if (!["http", "https"].includes(uri.scheme)) {
+ return false;
+ }
+
+ if (!isBaseUri) {
+ // Sadly, some high-profile pages have false positives, so bail early for those:
+ let {host} = uri;
+ if (this._blockedHosts.some(blockedHost => host.endsWith(blockedHost))) {
+ return false;
+ }
+
+ if (uri.filePath == "/") {
+ return false;
+ }
+ }
+
+ return true;
+ },
+};
+
+XPCOMUtils.defineLazyPreferenceGetter(
+ Readerable, "isEnabled", "reader.parse-on-load.enabled", true);
+XPCOMUtils.defineLazyPreferenceGetter(
+ Readerable, "isForceEnabled", "reader.parse-on-load.force-enabled", false);
diff --git a/toolkit/components/reader/Readerable.jsm b/toolkit/components/reader/Readerable.jsm
new file mode 100644
index 000000000000..2268487e4296
--- /dev/null
+++ b/toolkit/components/reader/Readerable.jsm
@@ -0,0 +1,10 @@
+// -*- indent-tabs-mode: nil; js-indent-level: 2 -*-
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+"use strict";
+
+var EXPORTED_SYMBOLS = ["Readerable"];
+
+#include Readability-readerable.js
+#include Readerable.js
diff --git a/toolkit/components/reader/moz.build b/toolkit/components/reader/moz.build
index 6863d6542747..c2364ce8deb4 100644
--- a/toolkit/components/reader/moz.build
+++ b/toolkit/components/reader/moz.build
@@ -8,14 +8,18 @@ JAR_MANIFESTS += ['jar.mn']
EXTRA_JS_MODULES += [
'AboutReader.jsm',
- 'ReaderMode.jsm'
+ 'ReaderMode.jsm',
+]
+
+EXTRA_PP_JS_MODULES += [
+ 'Readerable.jsm',
]
EXTRA_JS_MODULES.reader = [
'JSDOMParser.js',
'Readability.js',
'ReaderWorker.js',
- 'ReaderWorker.jsm'
+ 'ReaderWorker.jsm',
]
BROWSER_CHROME_MANIFESTS += [