diff --git a/browser/actors/PageMetadataChild.jsm b/browser/actors/PageMetadataChild.jsm deleted file mode 100644 index ff1512dd443e..000000000000 --- a/browser/actors/PageMetadataChild.jsm +++ /dev/null @@ -1,33 +0,0 @@ -/* vim: set ts=2 sw=2 sts=2 et tw=80: */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -"use strict"; - -var EXPORTED_SYMBOLS = ["PageMetadataChild"]; - -ChromeUtils.import("resource://gre/actors/ActorChild.jsm"); - -ChromeUtils.defineModuleGetter(this, "ContextMenuChild", - "resource:///modules/ContextMenuChild.jsm"); -ChromeUtils.defineModuleGetter(this, "PageMetadata", - "resource://gre/modules/PageMetadata.jsm"); - -class PageMetadataChild extends ActorChild { - receiveMessage(message) { - switch (message.name) { - case "PageMetadata:GetPageData": { - let target = ContextMenuChild.getTarget(this.mm, message); - let result = PageMetadata.getData(this.content.document, target); - this.mm.sendAsyncMessage("PageMetadata:PageDataResult", result); - break; - } - case "PageMetadata:GetMicroformats": { - let target = ContextMenuChild.getTarget(this.mm, message); - let result = PageMetadata.getMicroformats(this.content.document, target); - this.mm.sendAsyncMessage("PageMetadata:MicroformatsResult", result); - break; - } - } - } -} diff --git a/browser/actors/moz.build b/browser/actors/moz.build index 16ccd3f4fa1b..e84a2a8e7af1 100644 --- a/browser/actors/moz.build +++ b/browser/actors/moz.build @@ -40,7 +40,6 @@ FINAL_TARGET_FILES.actors += [ 'NetErrorChild.jsm', 'OfflineAppsChild.jsm', 'PageInfoChild.jsm', - 'PageMetadataChild.jsm', 'PageStyleChild.jsm', 'PluginChild.jsm', 'SearchTelemetryChild.jsm', diff --git a/browser/base/content/test/general/browser.ini b/browser/base/content/test/general/browser.ini index b7cfce89d6c7..19657834160a 100644 --- a/browser/base/content/test/general/browser.ini +++ b/browser/base/content/test/general/browser.ini @@ -349,8 +349,6 @@ skip-if = os == 'win' # Bug 1384127 # DO NOT ADD MORE TESTS HERE. USE A TOPICAL DIRECTORY INSTEAD. [browser_private_no_prompt.js] # DO NOT ADD MORE TESTS HERE. USE A TOPICAL DIRECTORY INSTEAD. -[browser_PageMetaData_pushstate.js] -# DO NOT ADD MORE TESTS HERE. USE A TOPICAL DIRECTORY INSTEAD. [browser_refreshBlocker.js] support-files = refresh_header.sjs diff --git a/browser/base/content/test/general/browser_PageMetaData_pushstate.js b/browser/base/content/test/general/browser_PageMetaData_pushstate.js deleted file mode 100644 index b8ca23eee3da..000000000000 --- a/browser/base/content/test/general/browser_PageMetaData_pushstate.js +++ /dev/null @@ -1,31 +0,0 @@ -/* Any copyright is dedicated to the Public Domain. - * http://creativecommons.org/publicdomain/zero/1.0/ - */ - -add_task(async function() { - let rooturi = "https://example.com/browser/toolkit/modules/tests/browser/"; - await BrowserTestUtils.openNewForegroundTab(gBrowser, rooturi + "metadata_simple.html"); - await ContentTask.spawn(gBrowser.selectedBrowser, { rooturi }, async function(args) { - ChromeUtils.import("resource://gre/modules/PageMetadata.jsm"); - - let result = PageMetadata.getData(content.document); - // Result should have description. - Assert.equal(result.url, args.rooturi + "metadata_simple.html", "metadata url is correct"); - Assert.equal(result.title, "Test Title", "metadata title is correct"); - Assert.equal(result.description, "A very simple test page", "description is correct"); - - content.history.pushState({}, "2", "2.html"); - result = PageMetadata.getData(content.document); - // Result should not have description. - Assert.equal(result.url, args.rooturi + "2.html", "metadata url is correct"); - Assert.equal(result.title, "Test Title", "metadata title is correct"); - Assert.ok(!result.description, "description is undefined"); - - Assert.equal(content.document.documentURI, args.rooturi + "2.html", - "content.document has correct url"); - }); - - is(gBrowser.currentURI.spec, rooturi + "2.html", "gBrowser has correct url"); - - gBrowser.removeTab(gBrowser.selectedTab); -}); diff --git a/browser/components/nsBrowserGlue.js b/browser/components/nsBrowserGlue.js index 7b1040a019e8..891a8caabbf7 100644 --- a/browser/components/nsBrowserGlue.js +++ b/browser/components/nsBrowserGlue.js @@ -202,16 +202,6 @@ let ACTORS = { }, }, - PageMetadata: { - child: { - module: "resource:///actors/PageMetadataChild.jsm", - messages: [ - "PageMetadata:GetPageData", - "PageMetadata:GetMicroformats", - ], - }, - }, - PageStyle: { child: { module: "resource:///actors/PageStyleChild.jsm", diff --git a/devtools/client/responsive.html/browser/tunnel.js b/devtools/client/responsive.html/browser/tunnel.js index b54e4a15caae..4e08a8d2156d 100644 --- a/devtools/client/responsive.html/browser/tunnel.js +++ b/devtools/client/responsive.html/browser/tunnel.js @@ -458,7 +458,6 @@ MessageManagerTunnel.prototype = { "PageInfo:", // Messages sent from printUtils.js "Printing:", - "PageMetadata:", // Messages sent from viewSourceUtils.js "ViewSource:", ], @@ -480,7 +479,6 @@ MessageManagerTunnel.prototype = { "PageInfo:", // Messages sent to printUtils.js "Printing:", - "PageMetadata:", // Messages sent to viewSourceUtils.js "ViewSource:", ], diff --git a/toolkit/modules/PageMetadata.jsm b/toolkit/modules/PageMetadata.jsm deleted file mode 100644 index c1f809738919..000000000000 --- a/toolkit/modules/PageMetadata.jsm +++ /dev/null @@ -1,295 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -"use strict"; - -var EXPORTED_SYMBOLS = ["PageMetadata"]; - -ChromeUtils.import("resource://gre/modules/Services.jsm"); -ChromeUtils.import("resource://gre/modules/XPCOMUtils.jsm"); -ChromeUtils.import("resource://gre/modules/microformat-shiv.js"); - -XPCOMUtils.defineLazyServiceGetter(this, "UnescapeService", - "@mozilla.org/feed-unescapehtml;1", - "nsIScriptableUnescapeHTML"); - - -/** - * Maximum number of images to discover in the document, when no preview images - * are explicitly specified by the metadata. - * @type {Number} - */ -const DISCOVER_IMAGES_MAX = 5; - - -/** - * Extract metadata and microformats from a HTML document. - * @type {Object} - */ -var PageMetadata = { - /** - * Get all metadata from an HTML document. This includes: - * - URL - * - title - * - Metadata specified in tags, including OpenGraph data - * - Links specified in tags (short, canonical, preview images, alternative) - * - Content that can be found in the page content that we consider useful metadata - * - Microformats - * - * @param {Document} document - Document to extract data from. - * @param {Element} [target] - Optional element to restrict microformats lookup to. - * @returns {Object} Object containing the various metadata, normalized to - * merge some common alternative names for metadata. - */ - getData(document, target = null) { - let result = { - url: this._validateURL(document, document.documentURI), - title: document.title, - previews: [], - }; - - // if pushState was used to change the url, most likely all meta data is - // invalid. This is the case with several major sites that rely on - // pushState. In that case, we'll only return uri and title. If document is - // via XHR or something, there is no view or history. - if (document.defaultView) { - let docshell = document.defaultView.docShell; - let shentry = {}; - if (docshell.getCurrentSHEntry(shentry) && - shentry.value && shentry.value.URIWasModified) { - return result; - } - } - - this._getMetaData(document, result); - this._getLinkData(document, result); - this._getPageData(document, result); - result.microformats = this.getMicroformats(document, target); - - return result; - }, - - getMicroformats(document, target = null) { - if (target) { - return Microformats.getParent(target, {node: document}); - } - return Microformats.get({node: document}); - }, - - /** - * Get metadata as defined in tags. - * This adds properties to an existing result object. - * - * @param {Document} document - Document to extract data from. - * @param {Object} result - Existing result object to add properties to. - */ - _getMetaData(document, result) { - // Query for standardized meta data. - let elements = document.querySelectorAll("head > meta[property], head > meta[name]"); - if (elements.length < 1) { - return; - } - - for (let element of elements) { - let value = element.getAttribute("content"); - if (!value) { - continue; - } - value = UnescapeService.unescape(value.trim()); - - let key = element.getAttribute("property") || element.getAttribute("name"); - if (!key) { - continue; - } - - // There are a wide array of possible meta tags, expressing articles, - // products, etc. so all meta tags are passed through but we touch up the - // most common attributes. - result[key] = value; - - switch (key) { - case "title": - case "og:title": { - // Only set the title if one hasn't already been obtained (e.g. from the - // document title element). - if (!result.title) { - result.title = value; - } - break; - } - - case "description": - case "og:description": { - result.description = value; - break; - } - - case "og:site_name": { - result.siteName = value; - break; - } - - case "medium": - case "og:type": { - result.medium = value; - break; - } - - case "og:video": { - let url = this._validateURL(document, value); - if (url) { - result.source = url; - } - break; - } - - case "og:url": { - let url = this._validateURL(document, value); - if (url) { - result.url = url; - } - break; - } - - case "og:image": { - let url = this._validateURL(document, value); - if (url) { - result.previews.push(url); - } - break; - } - } - } - }, - - /** - * Get metadata as defined in tags. - * This adds properties to an existing result object. - * - * @param {Document} document - Document to extract data from. - * @param {Object} result - Existing result object to add properties to. - */ - _getLinkData(document, result) { - let elements = document.querySelectorAll("head > link[rel], head > link[id]"); - - for (let element of elements) { - let url = element.getAttribute("href"); - if (!url) { - continue; - } - url = this._validateURL(document, UnescapeService.unescape(url.trim())); - - let key = element.getAttribute("rel") || element.getAttribute("id"); - if (!key) { - continue; - } - - switch (key) { - case "shorturl": - case "shortlink": { - result.shortUrl = url; - break; - } - - case "canonicalurl": - case "canonical": { - result.url = url; - break; - } - - case "image_src": { - result.previews.push(url); - break; - } - - case "alternate": { - // Expressly for oembed support but we're liberal here and will let - // other alternate links through. oembed defines an href, supplied by - // the site, where you can fetch additional meta data about a page. - // We'll let the client fetch the oembed data themselves, but they - // need the data from this link. - if (!result.alternate) { - result.alternate = []; - } - - result.alternate.push({ - type: element.getAttribute("type"), - href: element.getAttribute("href"), - title: element.getAttribute("title"), - }); - } - } - } - }, - - /** - * Scrape thought the page content for additional content that may be used to - * suppliment explicitly defined metadata. This includes: - * - First few images, when no preview image metadata is explicitly defined. - * - * This adds properties to an existing result object. - * - * @param {Document} document - Document to extract data from. - * @param {Object} result - Existing result object to add properties to. - */ - _getPageData(document, result) { - if (result.previews.length < 1) { - result.previews = this._getImageUrls(document); - } - }, - - /** - * Find the first few images in a document, for use as preview images. - * Will return upto DISCOVER_IMAGES_MAX number of images. - * - * @note This is not very clever. It does not (yet) check if any of the - * images may be appropriate as a preview image. - * - * @param {Document} document - Document to extract data from. - * @return {[string]} Array of URLs. - */ - _getImageUrls(document) { - let result = []; - let elements = document.querySelectorAll("img"); - - for (let element of elements) { - let src = element.getAttribute("src"); - if (src) { - result.push(this._validateURL(document, UnescapeService.unescape(src))); - - // We don't want a billion images. - // TODO: Move this magic number to a const. - if (result.length > DISCOVER_IMAGES_MAX) { - break; - } - } - } - - return result; - }, - - /** - * Validate a URL. This involves resolving the URL if it's relative to the - * document location, ensuring it's using an expected scheme, and stripping - * the userPass portion of the URL. - * - * @param {Document} document - Document to use as the root location for a relative URL. - * @param {string} url - URL to validate. - * @return {string} Result URL. - */ - _validateURL(document, url) { - let docURI = Services.io.newURI(document.documentURI); - let uri = Services.io.newURI(docURI.resolve(url)); - - if (!["http", "https"].includes(uri.scheme)) { - return null; - } - - uri = uri.mutate() - .setUserPass("") - .finalize(); - - return uri.spec; - }, -}; diff --git a/toolkit/modules/moz.build b/toolkit/modules/moz.build index 44be08d874ec..dd98a504995a 100644 --- a/toolkit/modules/moz.build +++ b/toolkit/modules/moz.build @@ -222,7 +222,6 @@ EXTRA_JS_MODULES += [ 'ObjectUtils.jsm', 'offlineAppCache.jsm', 'PageMenu.jsm', - 'PageMetadata.jsm', 'PermissionsUtils.jsm', 'PopupNotifications.jsm', 'Preferences.jsm', diff --git a/toolkit/modules/tests/browser/browser.ini b/toolkit/modules/tests/browser/browser.ini index 277283d11aed..85edc7c499d5 100644 --- a/toolkit/modules/tests/browser/browser.ini +++ b/toolkit/modules/tests/browser/browser.ini @@ -1,7 +1,6 @@ [DEFAULT] support-files = dummy_page.html - metadata_*.html file_FinderIframeTest.html file_FinderSample.html file_WebNavigation_page1.html @@ -51,6 +50,5 @@ skip-if = (verify && debug && (os == 'mac')) [browser_WebRequest_ancestors.js] [browser_WebRequest_cookies.js] [browser_WebRequest_filtering.js] -[browser_PageMetadata.js] [browser_PromiseMessage.js] [browser_Troubleshoot.js] diff --git a/toolkit/modules/tests/browser/browser_PageMetadata.js b/toolkit/modules/tests/browser/browser_PageMetadata.js deleted file mode 100644 index 9df7d1e2a888..000000000000 --- a/toolkit/modules/tests/browser/browser_PageMetadata.js +++ /dev/null @@ -1,73 +0,0 @@ -/** - * Tests PageMetadata.jsm, which extracts metadata and microdata from a - * document. - */ - -var {PageMetadata} = ChromeUtils.import("resource://gre/modules/PageMetadata.jsm", {}); - -var rootURL = "http://example.com/browser/toolkit/modules/tests/browser/"; - -function promiseDocument(fileName) { - let url = rootURL + fileName; - - return new Promise((resolve, reject) => { - let xhr = new XMLHttpRequest(); - xhr.onload = () => resolve(xhr.responseXML); - xhr.onerror = () => reject(new Error("Error loading document")); - xhr.open("GET", url); - xhr.responseType = "document"; - xhr.send(); - }); -} - -/** - * Load a simple document. - */ -add_task(async function simpleDoc() { - let fileName = "metadata_simple.html"; - info(`Loading a simple page, ${fileName}`); - - let doc = await promiseDocument(fileName); - Assert.notEqual(doc, null, - "Should have a document to analyse"); - - let data = PageMetadata.getData(doc); - Assert.notEqual(data, null, - "Should have non-null result"); - Assert.equal(data.url, rootURL + fileName, - "Should have expected url property"); - Assert.equal(data.title, "Test Title", - "Should have expected title property"); - Assert.equal(data.description, "A very simple test page", - "Should have expected title property"); -}); - -add_task(async function titlesDoc() { - let fileName = "metadata_titles.html"; - info(`Loading titles page, ${fileName}`); - - let doc = await promiseDocument(fileName); - Assert.notEqual(doc, null, - "Should have a document to analyse"); - - let data = PageMetadata.getData(doc); - Assert.notEqual(data, null, - "Should have non-null result"); - Assert.equal(data.title, "Test Titles", - "Should use the page title, not the open graph title"); -}); - -add_task(async function titlesFallbackDoc() { - let fileName = "metadata_titles_fallback.html"; - info(`Loading titles page, ${fileName}`); - - let doc = await promiseDocument(fileName); - Assert.notEqual(doc, null, - "Should have a document to analyse"); - - let data = PageMetadata.getData(doc); - Assert.notEqual(data, null, - "Should have non-null result"); - Assert.equal(data.title, "Title", - "Should use the open graph title"); -}); diff --git a/toolkit/modules/tests/browser/metadata_simple.html b/toolkit/modules/tests/browser/metadata_simple.html deleted file mode 100644 index 18089e399238..000000000000 --- a/toolkit/modules/tests/browser/metadata_simple.html +++ /dev/null @@ -1,10 +0,0 @@ - - -
-