forked from mirrors/gecko-dev
		
	
		
			
				
	
	
		
			561 lines
		
	
	
	
		
			16 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			561 lines
		
	
	
	
		
			16 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| // -*- indent-tabs-mode: nil; js-indent-level: 2 -*-
 | |
| /* This Source Code Form is subject to the terms of the Mozilla Public
 | |
|  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 | |
|  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 | |
| 
 | |
| // Constants for telemetry.
 | |
| const DOWNLOAD_SUCCESS = 0;
 | |
| const DOWNLOAD_ERROR_XHR = 1;
 | |
| const DOWNLOAD_ERROR_NO_DOC = 2;
 | |
| 
 | |
| const PARSE_SUCCESS = 0;
 | |
| const PARSE_ERROR_TOO_MANY_ELEMENTS = 1;
 | |
| const PARSE_ERROR_WORKER = 2;
 | |
| const PARSE_ERROR_NO_ARTICLE = 3;
 | |
| 
 | |
| // Class names to preserve in the readerized output. We preserve these class
 | |
| // names so that rules in aboutReader.css can match them.
 | |
| const CLASSES_TO_PRESERVE = [
 | |
|   "caption",
 | |
|   "emoji",
 | |
|   "hidden",
 | |
|   "invisible",
 | |
|   "sr-only",
 | |
|   "visually-hidden",
 | |
|   "visuallyhidden",
 | |
|   "wp-caption",
 | |
|   "wp-caption-text",
 | |
|   "wp-smiley",
 | |
| ];
 | |
| 
 | |
| import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs";
 | |
| 
 | |
| const lazy = {};
 | |
| 
 | |
| ChromeUtils.defineESModuleGetters(lazy, {
 | |
|   LanguageDetector:
 | |
|     "resource://gre/modules/translation/LanguageDetector.sys.mjs",
 | |
|   ReaderWorker: "resource://gre/modules/reader/ReaderWorker.sys.mjs",
 | |
|   Readerable: "resource://gre/modules/Readerable.sys.mjs",
 | |
| });
 | |
| 
 | |
| const gIsFirefoxDesktop =
 | |
|   Services.appinfo.ID == "{ec8030f7-c20a-464f-9b0e-13a3a9e97384}";
 | |
| 
 | |
| Services.telemetry.setEventRecordingEnabled("readermode", true);
 | |
| 
 | |
| export var ReaderMode = {
 | |
|   DEBUG: 0,
 | |
| 
 | |
|   // For time spent telemetry
 | |
|   enterTime: undefined,
 | |
|   leaveTime: undefined,
 | |
| 
 | |
|   /**
 | |
|    * Enter the reader mode by going forward one step in history if applicable,
 | |
|    * if not, append the about:reader page in the history instead.
 | |
|    */
 | |
|   enterReaderMode(docShell, win) {
 | |
|     this.enterTime = Date.now();
 | |
| 
 | |
|     Services.telemetry.recordEvent("readermode", "view", "on", null, {
 | |
|       subcategory: "feature",
 | |
|     });
 | |
| 
 | |
|     let url = win.document.location.href;
 | |
|     let readerURL = "about:reader?url=" + encodeURIComponent(url);
 | |
| 
 | |
|     if (!Services.appinfo.sessionHistoryInParent) {
 | |
|       let webNav = docShell.QueryInterface(Ci.nsIWebNavigation);
 | |
|       let sh = webNav.sessionHistory;
 | |
|       if (webNav.canGoForward) {
 | |
|         let forwardEntry = sh.legacySHistory.getEntryAtIndex(sh.index + 1);
 | |
|         let forwardURL = forwardEntry.URI.spec;
 | |
|         if (forwardURL && (forwardURL == readerURL || !readerURL)) {
 | |
|           webNav.goForward();
 | |
|           return;
 | |
|         }
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     // This could possibly move to the parent. See bug 1664982.
 | |
|     win.document.location = readerURL;
 | |
|   },
 | |
| 
 | |
|   /**
 | |
|    * Exit the reader mode by going back one step in history if applicable,
 | |
|    * if not, append the original page in the history instead.
 | |
|    */
 | |
|   leaveReaderMode(docShell, win) {
 | |
|     this.leaveTime = Date.now();
 | |
| 
 | |
|     // Measured in seconds (whole number)
 | |
|     let timeSpentInReaderMode = Math.floor(
 | |
|       (this.leaveTime - this.enterTime) / 1000
 | |
|     );
 | |
| 
 | |
|     // Measured as percentage (whole number)
 | |
|     let scrollPosition = Math.floor(
 | |
|       ((win.scrollY + win.innerHeight) / win.document.body.clientHeight) * 100
 | |
|     );
 | |
| 
 | |
|     Services.telemetry.recordEvent("readermode", "view", "off", null, {
 | |
|       subcategory: "feature",
 | |
|       reader_time: `${timeSpentInReaderMode}`,
 | |
|       scroll_position: `${scrollPosition}`,
 | |
|     });
 | |
| 
 | |
|     let url = win.document.location.href;
 | |
|     let originalURL = this.getOriginalUrl(url);
 | |
|     let webNav = docShell.QueryInterface(Ci.nsIWebNavigation);
 | |
| 
 | |
|     if (!Services.appinfo.sessionHistoryInParent) {
 | |
|       let sh = webNav.sessionHistory;
 | |
|       if (webNav.canGoBack) {
 | |
|         let prevEntry = sh.legacySHistory.getEntryAtIndex(sh.index - 1);
 | |
|         let prevURL = prevEntry.URI.spec;
 | |
|         if (prevURL && (prevURL == originalURL || !originalURL)) {
 | |
|           webNav.goBack();
 | |
|           return;
 | |
|         }
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     let referrerURI, principal;
 | |
|     try {
 | |
|       referrerURI = Services.io.newURI(url);
 | |
|       principal = Services.scriptSecurityManager.createContentPrincipal(
 | |
|         referrerURI,
 | |
|         win.document.nodePrincipal.originAttributes
 | |
|       );
 | |
|     } catch (e) {
 | |
|       console.error(e);
 | |
|       return;
 | |
|     }
 | |
|     let loadFlags = webNav.LOAD_FLAGS_DISALLOW_INHERIT_PRINCIPAL;
 | |
|     let ReferrerInfo = Components.Constructor(
 | |
|       "@mozilla.org/referrer-info;1",
 | |
|       "nsIReferrerInfo",
 | |
|       "init"
 | |
|     );
 | |
|     let loadURIOptions = {
 | |
|       triggeringPrincipal: principal,
 | |
|       loadFlags,
 | |
|       referrerInfo: new ReferrerInfo(
 | |
|         Ci.nsIReferrerInfo.EMPTY,
 | |
|         true,
 | |
|         referrerURI
 | |
|       ),
 | |
|     };
 | |
|     // This could possibly move to the parent. See bug 1664982.
 | |
|     webNav.fixupAndLoadURIString(originalURL, loadURIOptions);
 | |
|   },
 | |
| 
 | |
|   /**
 | |
|    * Returns original URL from an about:reader URL.
 | |
|    *
 | |
|    * @param url An about:reader URL.
 | |
|    * @return The original URL for the article, or null if we did not find
 | |
|    *         a properly formatted about:reader URL.
 | |
|    */
 | |
|   getOriginalUrl(url) {
 | |
|     if (!url.startsWith("about:reader?")) {
 | |
|       return null;
 | |
|     }
 | |
| 
 | |
|     let outerHash = "";
 | |
|     try {
 | |
|       let uriObj = Services.io.newURI(url);
 | |
|       url = uriObj.specIgnoringRef;
 | |
|       outerHash = uriObj.ref;
 | |
|     } catch (ex) {
 | |
|       /* ignore, use the raw string */
 | |
|     }
 | |
| 
 | |
|     let searchParams = new URLSearchParams(
 | |
|       url.substring("about:reader?".length)
 | |
|     );
 | |
|     if (!searchParams.has("url")) {
 | |
|       return null;
 | |
|     }
 | |
|     let originalUrl = searchParams.get("url");
 | |
|     if (outerHash) {
 | |
|       try {
 | |
|         let uriObj = Services.io.newURI(originalUrl);
 | |
|         uriObj = Services.io.newURI("#" + outerHash, null, uriObj);
 | |
|         originalUrl = uriObj.spec;
 | |
|       } catch (ex) {}
 | |
|     }
 | |
|     return originalUrl;
 | |
|   },
 | |
| 
 | |
|   getOriginalUrlObjectForDisplay(url) {
 | |
|     let originalUrl = this.getOriginalUrl(url);
 | |
|     if (originalUrl) {
 | |
|       let uriObj;
 | |
|       try {
 | |
|         uriObj = Services.uriFixup.getFixupURIInfo(originalUrl).preferredURI;
 | |
|       } catch (ex) {
 | |
|         return null;
 | |
|       }
 | |
|       try {
 | |
|         return Services.io.createExposableURI(uriObj);
 | |
|       } catch (ex) {
 | |
|         return null;
 | |
|       }
 | |
|     }
 | |
|     return null;
 | |
|   },
 | |
| 
 | |
|   /**
 | |
|    * Gets an article from a loaded browser's document. This method will not attempt
 | |
|    * to parse certain URIs (e.g. about: URIs).
 | |
|    *
 | |
|    * @param doc A document to parse.
 | |
|    * @return {Promise}
 | |
|    * @resolves JS object representing the article, or null if no article is found.
 | |
|    */
 | |
|   parseDocument(doc) {
 | |
|     if (
 | |
|       !lazy.Readerable.shouldCheckUri(doc.documentURIObject) ||
 | |
|       !lazy.Readerable.shouldCheckUri(doc.baseURIObject, true)
 | |
|     ) {
 | |
|       this.log("Reader mode disabled for URI");
 | |
|       return null;
 | |
|     }
 | |
| 
 | |
|     return this._readerParse(doc);
 | |
|   },
 | |
| 
 | |
|   /**
 | |
|    * Downloads and parses a document from a URL.
 | |
|    *
 | |
|    * @param url URL to download and parse.
 | |
|    * @return {Promise}
 | |
|    * @resolves JS object representing the article, or null if no article is found.
 | |
|    */
 | |
|   async downloadAndParseDocument(url, docContentType = "document") {
 | |
|     let result = await this._downloadDocument(url, docContentType);
 | |
|     if (!result?.doc) {
 | |
|       return null;
 | |
|     }
 | |
|     let { doc, newURL } = result;
 | |
|     if (
 | |
|       !lazy.Readerable.shouldCheckUri(doc.documentURIObject) ||
 | |
|       !lazy.Readerable.shouldCheckUri(doc.baseURIObject, true)
 | |
|     ) {
 | |
|       this.log("Reader mode disabled for URI");
 | |
|       return null;
 | |
|     }
 | |
| 
 | |
|     let article = await this._readerParse(doc);
 | |
|     // If we have to redirect, reject to the caller with the parsed article,
 | |
|     // so we can update the URL before displaying it.
 | |
|     if (newURL) {
 | |
|       return Promise.reject({ newURL, article });
 | |
|     }
 | |
|     // Otherwise, we can just continue with the article.
 | |
|     return article;
 | |
|   },
 | |
| 
 | |
|   _downloadDocument(url, docContentType = "document") {
 | |
|     try {
 | |
|       if (!lazy.Readerable.shouldCheckUri(Services.io.newURI(url))) {
 | |
|         return null;
 | |
|       }
 | |
|     } catch (ex) {
 | |
|       console.error(
 | |
|         new Error(`Couldn't create URI from ${url} to download: ${ex}`)
 | |
|       );
 | |
|       return null;
 | |
|     }
 | |
|     let histogram = Services.telemetry.getHistogramById(
 | |
|       "READER_MODE_DOWNLOAD_RESULT"
 | |
|     );
 | |
|     return new Promise((resolve, reject) => {
 | |
|       let xhr = new XMLHttpRequest();
 | |
|       xhr.open("GET", url, true);
 | |
|       xhr.onerror = evt => reject(evt.error);
 | |
|       xhr.responseType = docContentType === "text/plain" ? "text" : "document";
 | |
|       xhr.onload = evt => {
 | |
|         if (xhr.status !== 200) {
 | |
|           reject("Reader mode XHR failed with status: " + xhr.status);
 | |
|           histogram.add(DOWNLOAD_ERROR_XHR);
 | |
|           return;
 | |
|         }
 | |
| 
 | |
|         let doc =
 | |
|           xhr.responseType === "text" ? xhr.responseText : xhr.responseXML;
 | |
|         if (!doc) {
 | |
|           reject("Reader mode XHR didn't return a document");
 | |
|           histogram.add(DOWNLOAD_ERROR_NO_DOC);
 | |
|           return;
 | |
|         }
 | |
| 
 | |
|         let responseURL = xhr.responseURL;
 | |
|         let givenURL = url;
 | |
|         // Convert these to real URIs to make sure the escaping (or lack
 | |
|         // thereof) is identical:
 | |
|         try {
 | |
|           responseURL = Services.io.newURI(responseURL).specIgnoringRef;
 | |
|         } catch (ex) {
 | |
|           /* Ignore errors - we'll use what we had before */
 | |
|         }
 | |
|         try {
 | |
|           givenURL = Services.io.newURI(givenURL).specIgnoringRef;
 | |
|         } catch (ex) {
 | |
|           /* Ignore errors - we'll use what we had before */
 | |
|         }
 | |
| 
 | |
|         if (xhr.responseType != "document") {
 | |
|           let initialText = doc;
 | |
|           let parser = new DOMParser();
 | |
|           doc = parser.parseFromString(`<pre></pre>`, "text/html");
 | |
|           doc.querySelector("pre").textContent = initialText;
 | |
|         }
 | |
| 
 | |
|         // We treat redirects as download successes here:
 | |
|         histogram.add(DOWNLOAD_SUCCESS);
 | |
| 
 | |
|         let result = { doc };
 | |
|         if (responseURL != givenURL) {
 | |
|           result.newURL = xhr.responseURL;
 | |
|         }
 | |
| 
 | |
|         resolve(result);
 | |
|       };
 | |
|       xhr.send();
 | |
|     });
 | |
|   },
 | |
| 
 | |
|   log(msg) {
 | |
|     if (this.DEBUG) {
 | |
|       dump("Reader: " + msg);
 | |
|     }
 | |
|   },
 | |
| 
 | |
|   /**
 | |
|    * Attempts to parse a document into an article. Heavy lifting happens
 | |
|    * in Reader.worker.js.
 | |
|    *
 | |
|    * @param doc The document to parse.
 | |
|    * @return {Promise}
 | |
|    * @resolves JS object representing the article, or null if no article is found.
 | |
|    */
 | |
|   async _readerParse(doc) {
 | |
|     let histogram = Services.telemetry.getHistogramById(
 | |
|       "READER_MODE_PARSE_RESULT"
 | |
|     );
 | |
|     if (this.parseNodeLimit) {
 | |
|       let numTags = doc.getElementsByTagName("*").length;
 | |
|       if (numTags > this.parseNodeLimit) {
 | |
|         this.log(
 | |
|           "Aborting parse for " +
 | |
|             doc.baseURIObject.spec +
 | |
|             "; " +
 | |
|             numTags +
 | |
|             " elements found"
 | |
|         );
 | |
|         histogram.add(PARSE_ERROR_TOO_MANY_ELEMENTS);
 | |
|         return null;
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     // Fetch this here before we send `doc` off to the worker thread, as later on the
 | |
|     // document might be nuked but we will still want the URI.
 | |
|     let { documentURI } = doc;
 | |
| 
 | |
|     let uriParam;
 | |
|     uriParam = {
 | |
|       spec: doc.baseURIObject.spec,
 | |
|       prePath: doc.baseURIObject.prePath,
 | |
|       scheme: doc.baseURIObject.scheme,
 | |
| 
 | |
|       // Fallback
 | |
|       host: documentURI,
 | |
|       pathBase: documentURI,
 | |
|     };
 | |
| 
 | |
|     // nsIURI.host throws an exception if a host doesn't exist.
 | |
|     try {
 | |
|       uriParam.host = doc.baseURIObject.host;
 | |
|       uriParam.pathBase = Services.io.newURI(".", null, doc.baseURIObject).spec;
 | |
|     } catch (ex) {
 | |
|       // Fall back to the initial values we assigned.
 | |
|       console.warn("Error accessing host name: ", ex);
 | |
|     }
 | |
| 
 | |
|     // convert text/plain document, if any, to XHTML format
 | |
|     if (this._isDocumentPlainText(doc)) {
 | |
|       doc = this._convertPlainTextDocument(doc);
 | |
|     }
 | |
| 
 | |
|     let serializer = new XMLSerializer();
 | |
|     let serializedDoc = serializer.serializeToString(doc);
 | |
|     // Explicitly null out doc to make it clear it might not be available from this
 | |
|     // point on.
 | |
|     doc = null;
 | |
| 
 | |
|     let options = {
 | |
|       classesToPreserve: CLASSES_TO_PRESERVE,
 | |
|     };
 | |
| 
 | |
|     let article = null;
 | |
|     try {
 | |
|       article = await lazy.ReaderWorker.post("parseDocument", [
 | |
|         uriParam,
 | |
|         serializedDoc,
 | |
|         options,
 | |
|       ]);
 | |
|     } catch (e) {
 | |
|       console.error("Error in ReaderWorker: ", e);
 | |
|       histogram.add(PARSE_ERROR_WORKER);
 | |
|     }
 | |
| 
 | |
|     if (!article) {
 | |
|       this.log("Worker did not return an article");
 | |
|       histogram.add(PARSE_ERROR_NO_ARTICLE);
 | |
|       return null;
 | |
|     }
 | |
| 
 | |
|     // Readability returns a URI object based on the baseURI, but we only care
 | |
|     // about the original document's URL from now on. This also avoids spoofing
 | |
|     // attempts where the baseURI doesn't match the domain of the documentURI
 | |
|     article.url = documentURI;
 | |
|     delete article.uri;
 | |
| 
 | |
|     let flags =
 | |
|       Ci.nsIDocumentEncoder.OutputSelectionOnly |
 | |
|       Ci.nsIDocumentEncoder.OutputAbsoluteLinks;
 | |
|     article.title = Cc["@mozilla.org/parserutils;1"]
 | |
|       .getService(Ci.nsIParserUtils)
 | |
|       .convertToPlainText(article.title, flags, 0);
 | |
|     if (gIsFirefoxDesktop) {
 | |
|       await this._assignLanguage(article);
 | |
|       this._maybeAssignTextDirection(article);
 | |
|     }
 | |
| 
 | |
|     this._assignReadTime(article);
 | |
| 
 | |
|     histogram.add(PARSE_SUCCESS);
 | |
|     return article;
 | |
|   },
 | |
| 
 | |
|   /**
 | |
|    * Sets a global language string value if the result is confident
 | |
|    *
 | |
|    * @return Promise
 | |
|    * @resolves when the language is detected
 | |
|    */
 | |
|   _assignLanguage(article) {
 | |
|     return lazy.LanguageDetector.detectLanguage(article.textContent).then(
 | |
|       result => {
 | |
|         article.language = result.confident ? result.language : null;
 | |
|       }
 | |
|     );
 | |
|   },
 | |
| 
 | |
|   _maybeAssignTextDirection(article) {
 | |
|     // TODO: Remove the hardcoded language codes below once bug 1320265 is resolved.
 | |
|     if (
 | |
|       !article.dir &&
 | |
|       ["ar", "fa", "he", "ug", "ur"].includes(article.language)
 | |
|     ) {
 | |
|       article.dir = "rtl";
 | |
|     }
 | |
|   },
 | |
| 
 | |
|   /**
 | |
|    * Assigns the estimated reading time range of the article to the article object.
 | |
|    *
 | |
|    * @param article the article object to assign the reading time estimate to.
 | |
|    */
 | |
|   _assignReadTime(article) {
 | |
|     let lang = article.language || "en";
 | |
|     const readingSpeed = this._getReadingSpeedForLanguage(lang);
 | |
|     const charactersPerMinuteLow = readingSpeed.cpm - readingSpeed.variance;
 | |
|     const charactersPerMinuteHigh = readingSpeed.cpm + readingSpeed.variance;
 | |
|     const length = article.length;
 | |
| 
 | |
|     article.readingTimeMinsSlow = Math.ceil(length / charactersPerMinuteLow);
 | |
|     article.readingTimeMinsFast = Math.ceil(length / charactersPerMinuteHigh);
 | |
|   },
 | |
| 
 | |
|   /**
 | |
|    * Returns the reading speed of a selection of languages with likely variance.
 | |
|    *
 | |
|    * Reading speed estimated from a study done on reading speeds in various languages.
 | |
|    * study can be found here: http://iovs.arvojournals.org/article.aspx?articleid=2166061
 | |
|    *
 | |
|    * @return object with characters per minute and variance. Defaults to English
 | |
|    *         if no suitable language is found in the collection.
 | |
|    */
 | |
|   _getReadingSpeedForLanguage(lang) {
 | |
|     const readingSpeed = new Map([
 | |
|       ["en", { cpm: 987, variance: 118 }],
 | |
|       ["ar", { cpm: 612, variance: 88 }],
 | |
|       ["de", { cpm: 920, variance: 86 }],
 | |
|       ["es", { cpm: 1025, variance: 127 }],
 | |
|       ["fi", { cpm: 1078, variance: 121 }],
 | |
|       ["fr", { cpm: 998, variance: 126 }],
 | |
|       ["he", { cpm: 833, variance: 130 }],
 | |
|       ["it", { cpm: 950, variance: 140 }],
 | |
|       ["jw", { cpm: 357, variance: 56 }],
 | |
|       ["nl", { cpm: 978, variance: 143 }],
 | |
|       ["pl", { cpm: 916, variance: 126 }],
 | |
|       ["pt", { cpm: 913, variance: 145 }],
 | |
|       ["ru", { cpm: 986, variance: 175 }],
 | |
|       ["sk", { cpm: 885, variance: 145 }],
 | |
|       ["sv", { cpm: 917, variance: 156 }],
 | |
|       ["tr", { cpm: 1054, variance: 156 }],
 | |
|       ["zh", { cpm: 255, variance: 29 }],
 | |
|     ]);
 | |
| 
 | |
|     return readingSpeed.get(lang) || readingSpeed.get("en");
 | |
|   },
 | |
|   /**
 | |
|    *
 | |
|    * Check if the document to be parsed is text document.
 | |
|    * @param doc the doc object to be parsed.
 | |
|    * @return boolean
 | |
|    *
 | |
|    */
 | |
|   _isDocumentPlainText(doc) {
 | |
|     return doc.contentType == "text/plain";
 | |
|   },
 | |
|   /**
 | |
|    *
 | |
|    * The document to be parsed is text document and is converted to HTML format.
 | |
|    * @param doc the doc object to be parsed.
 | |
|    * @return doc
 | |
|    *
 | |
|    */
 | |
|   _convertPlainTextDocument(doc) {
 | |
|     let preTag = doc.querySelector("pre");
 | |
|     let docFrag = doc.createDocumentFragment();
 | |
|     let content = preTag.textContent;
 | |
|     let paragraphs = content.split(/\r?\n\r?\n/);
 | |
|     for (let para of paragraphs) {
 | |
|       let pElem = doc.createElement("p");
 | |
|       let lines = para.split(/\n/);
 | |
|       for (let line of lines) {
 | |
|         pElem.append(line);
 | |
|         let brElem = doc.createElement("br");
 | |
|         pElem.append(brElem);
 | |
|       }
 | |
|       docFrag.append(pElem);
 | |
|     }
 | |
|     // Clone the document to avoid the original document being affected
 | |
|     // (which shows up when exiting reader mode again).
 | |
|     let clone = doc.documentElement.cloneNode(true);
 | |
|     clone.querySelector("pre").replaceWith(docFrag);
 | |
|     return clone;
 | |
|   },
 | |
| };
 | |
| 
 | |
| XPCOMUtils.defineLazyPreferenceGetter(
 | |
|   ReaderMode,
 | |
|   "maxElemsToParse",
 | |
|   "reader.parse-node-limit",
 | |
|   0
 | |
| );
 | 
