forked from mirrors/gecko-dev
Bug 1503674 - Remove unused PageMetadata. r=Gijs
Differential Revision: https://phabricator.services.mozilla.com/D14678 --HG-- extra : moz-landing-system : lando
This commit is contained in:
parent
cd5624af54
commit
12331afc41
13 changed files with 0 additions and 481 deletions
|
|
@ -1,33 +0,0 @@
|
|||
/* vim: set ts=2 sw=2 sts=2 et tw=80: */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
"use strict";
|
||||
|
||||
var EXPORTED_SYMBOLS = ["PageMetadataChild"];
|
||||
|
||||
ChromeUtils.import("resource://gre/actors/ActorChild.jsm");
|
||||
|
||||
ChromeUtils.defineModuleGetter(this, "ContextMenuChild",
|
||||
"resource:///modules/ContextMenuChild.jsm");
|
||||
ChromeUtils.defineModuleGetter(this, "PageMetadata",
|
||||
"resource://gre/modules/PageMetadata.jsm");
|
||||
|
||||
class PageMetadataChild extends ActorChild {
|
||||
receiveMessage(message) {
|
||||
switch (message.name) {
|
||||
case "PageMetadata:GetPageData": {
|
||||
let target = ContextMenuChild.getTarget(this.mm, message);
|
||||
let result = PageMetadata.getData(this.content.document, target);
|
||||
this.mm.sendAsyncMessage("PageMetadata:PageDataResult", result);
|
||||
break;
|
||||
}
|
||||
case "PageMetadata:GetMicroformats": {
|
||||
let target = ContextMenuChild.getTarget(this.mm, message);
|
||||
let result = PageMetadata.getMicroformats(this.content.document, target);
|
||||
this.mm.sendAsyncMessage("PageMetadata:MicroformatsResult", result);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -40,7 +40,6 @@ FINAL_TARGET_FILES.actors += [
|
|||
'NetErrorChild.jsm',
|
||||
'OfflineAppsChild.jsm',
|
||||
'PageInfoChild.jsm',
|
||||
'PageMetadataChild.jsm',
|
||||
'PageStyleChild.jsm',
|
||||
'PluginChild.jsm',
|
||||
'SearchTelemetryChild.jsm',
|
||||
|
|
|
|||
|
|
@ -349,8 +349,6 @@ skip-if = os == 'win' # Bug 1384127
|
|||
# DO NOT ADD MORE TESTS HERE. USE A TOPICAL DIRECTORY INSTEAD.
|
||||
[browser_private_no_prompt.js]
|
||||
# DO NOT ADD MORE TESTS HERE. USE A TOPICAL DIRECTORY INSTEAD.
|
||||
[browser_PageMetaData_pushstate.js]
|
||||
# DO NOT ADD MORE TESTS HERE. USE A TOPICAL DIRECTORY INSTEAD.
|
||||
[browser_refreshBlocker.js]
|
||||
support-files =
|
||||
refresh_header.sjs
|
||||
|
|
|
|||
|
|
@ -1,31 +0,0 @@
|
|||
/* Any copyright is dedicated to the Public Domain.
|
||||
* http://creativecommons.org/publicdomain/zero/1.0/
|
||||
*/
|
||||
|
||||
add_task(async function() {
|
||||
let rooturi = "https://example.com/browser/toolkit/modules/tests/browser/";
|
||||
await BrowserTestUtils.openNewForegroundTab(gBrowser, rooturi + "metadata_simple.html");
|
||||
await ContentTask.spawn(gBrowser.selectedBrowser, { rooturi }, async function(args) {
|
||||
ChromeUtils.import("resource://gre/modules/PageMetadata.jsm");
|
||||
|
||||
let result = PageMetadata.getData(content.document);
|
||||
// Result should have description.
|
||||
Assert.equal(result.url, args.rooturi + "metadata_simple.html", "metadata url is correct");
|
||||
Assert.equal(result.title, "Test Title", "metadata title is correct");
|
||||
Assert.equal(result.description, "A very simple test page", "description is correct");
|
||||
|
||||
content.history.pushState({}, "2", "2.html");
|
||||
result = PageMetadata.getData(content.document);
|
||||
// Result should not have description.
|
||||
Assert.equal(result.url, args.rooturi + "2.html", "metadata url is correct");
|
||||
Assert.equal(result.title, "Test Title", "metadata title is correct");
|
||||
Assert.ok(!result.description, "description is undefined");
|
||||
|
||||
Assert.equal(content.document.documentURI, args.rooturi + "2.html",
|
||||
"content.document has correct url");
|
||||
});
|
||||
|
||||
is(gBrowser.currentURI.spec, rooturi + "2.html", "gBrowser has correct url");
|
||||
|
||||
gBrowser.removeTab(gBrowser.selectedTab);
|
||||
});
|
||||
|
|
@ -202,16 +202,6 @@ let ACTORS = {
|
|||
},
|
||||
},
|
||||
|
||||
PageMetadata: {
|
||||
child: {
|
||||
module: "resource:///actors/PageMetadataChild.jsm",
|
||||
messages: [
|
||||
"PageMetadata:GetPageData",
|
||||
"PageMetadata:GetMicroformats",
|
||||
],
|
||||
},
|
||||
},
|
||||
|
||||
PageStyle: {
|
||||
child: {
|
||||
module: "resource:///actors/PageStyleChild.jsm",
|
||||
|
|
|
|||
|
|
@ -458,7 +458,6 @@ MessageManagerTunnel.prototype = {
|
|||
"PageInfo:",
|
||||
// Messages sent from printUtils.js
|
||||
"Printing:",
|
||||
"PageMetadata:",
|
||||
// Messages sent from viewSourceUtils.js
|
||||
"ViewSource:",
|
||||
],
|
||||
|
|
@ -480,7 +479,6 @@ MessageManagerTunnel.prototype = {
|
|||
"PageInfo:",
|
||||
// Messages sent to printUtils.js
|
||||
"Printing:",
|
||||
"PageMetadata:",
|
||||
// Messages sent to viewSourceUtils.js
|
||||
"ViewSource:",
|
||||
],
|
||||
|
|
|
|||
|
|
@ -1,295 +0,0 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
"use strict";
|
||||
|
||||
var EXPORTED_SYMBOLS = ["PageMetadata"];
|
||||
|
||||
ChromeUtils.import("resource://gre/modules/Services.jsm");
|
||||
ChromeUtils.import("resource://gre/modules/XPCOMUtils.jsm");
|
||||
ChromeUtils.import("resource://gre/modules/microformat-shiv.js");
|
||||
|
||||
XPCOMUtils.defineLazyServiceGetter(this, "UnescapeService",
|
||||
"@mozilla.org/feed-unescapehtml;1",
|
||||
"nsIScriptableUnescapeHTML");
|
||||
|
||||
|
||||
/**
|
||||
* Maximum number of images to discover in the document, when no preview images
|
||||
* are explicitly specified by the metadata.
|
||||
* @type {Number}
|
||||
*/
|
||||
const DISCOVER_IMAGES_MAX = 5;
|
||||
|
||||
|
||||
/**
|
||||
* Extract metadata and microformats from a HTML document.
|
||||
* @type {Object}
|
||||
*/
|
||||
var PageMetadata = {
|
||||
/**
|
||||
* Get all metadata from an HTML document. This includes:
|
||||
* - URL
|
||||
* - title
|
||||
* - Metadata specified in <meta> tags, including OpenGraph data
|
||||
* - Links specified in <link> tags (short, canonical, preview images, alternative)
|
||||
* - Content that can be found in the page content that we consider useful metadata
|
||||
* - Microformats
|
||||
*
|
||||
* @param {Document} document - Document to extract data from.
|
||||
* @param {Element} [target] - Optional element to restrict microformats lookup to.
|
||||
* @returns {Object} Object containing the various metadata, normalized to
|
||||
* merge some common alternative names for metadata.
|
||||
*/
|
||||
getData(document, target = null) {
|
||||
let result = {
|
||||
url: this._validateURL(document, document.documentURI),
|
||||
title: document.title,
|
||||
previews: [],
|
||||
};
|
||||
|
||||
// if pushState was used to change the url, most likely all meta data is
|
||||
// invalid. This is the case with several major sites that rely on
|
||||
// pushState. In that case, we'll only return uri and title. If document is
|
||||
// via XHR or something, there is no view or history.
|
||||
if (document.defaultView) {
|
||||
let docshell = document.defaultView.docShell;
|
||||
let shentry = {};
|
||||
if (docshell.getCurrentSHEntry(shentry) &&
|
||||
shentry.value && shentry.value.URIWasModified) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
this._getMetaData(document, result);
|
||||
this._getLinkData(document, result);
|
||||
this._getPageData(document, result);
|
||||
result.microformats = this.getMicroformats(document, target);
|
||||
|
||||
return result;
|
||||
},
|
||||
|
||||
getMicroformats(document, target = null) {
|
||||
if (target) {
|
||||
return Microformats.getParent(target, {node: document});
|
||||
}
|
||||
return Microformats.get({node: document});
|
||||
},
|
||||
|
||||
/**
|
||||
* Get metadata as defined in <meta> tags.
|
||||
* This adds properties to an existing result object.
|
||||
*
|
||||
* @param {Document} document - Document to extract data from.
|
||||
* @param {Object} result - Existing result object to add properties to.
|
||||
*/
|
||||
_getMetaData(document, result) {
|
||||
// Query for standardized meta data.
|
||||
let elements = document.querySelectorAll("head > meta[property], head > meta[name]");
|
||||
if (elements.length < 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (let element of elements) {
|
||||
let value = element.getAttribute("content");
|
||||
if (!value) {
|
||||
continue;
|
||||
}
|
||||
value = UnescapeService.unescape(value.trim());
|
||||
|
||||
let key = element.getAttribute("property") || element.getAttribute("name");
|
||||
if (!key) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// There are a wide array of possible meta tags, expressing articles,
|
||||
// products, etc. so all meta tags are passed through but we touch up the
|
||||
// most common attributes.
|
||||
result[key] = value;
|
||||
|
||||
switch (key) {
|
||||
case "title":
|
||||
case "og:title": {
|
||||
// Only set the title if one hasn't already been obtained (e.g. from the
|
||||
// document title element).
|
||||
if (!result.title) {
|
||||
result.title = value;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case "description":
|
||||
case "og:description": {
|
||||
result.description = value;
|
||||
break;
|
||||
}
|
||||
|
||||
case "og:site_name": {
|
||||
result.siteName = value;
|
||||
break;
|
||||
}
|
||||
|
||||
case "medium":
|
||||
case "og:type": {
|
||||
result.medium = value;
|
||||
break;
|
||||
}
|
||||
|
||||
case "og:video": {
|
||||
let url = this._validateURL(document, value);
|
||||
if (url) {
|
||||
result.source = url;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case "og:url": {
|
||||
let url = this._validateURL(document, value);
|
||||
if (url) {
|
||||
result.url = url;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case "og:image": {
|
||||
let url = this._validateURL(document, value);
|
||||
if (url) {
|
||||
result.previews.push(url);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* Get metadata as defined in <link> tags.
|
||||
* This adds properties to an existing result object.
|
||||
*
|
||||
* @param {Document} document - Document to extract data from.
|
||||
* @param {Object} result - Existing result object to add properties to.
|
||||
*/
|
||||
_getLinkData(document, result) {
|
||||
let elements = document.querySelectorAll("head > link[rel], head > link[id]");
|
||||
|
||||
for (let element of elements) {
|
||||
let url = element.getAttribute("href");
|
||||
if (!url) {
|
||||
continue;
|
||||
}
|
||||
url = this._validateURL(document, UnescapeService.unescape(url.trim()));
|
||||
|
||||
let key = element.getAttribute("rel") || element.getAttribute("id");
|
||||
if (!key) {
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (key) {
|
||||
case "shorturl":
|
||||
case "shortlink": {
|
||||
result.shortUrl = url;
|
||||
break;
|
||||
}
|
||||
|
||||
case "canonicalurl":
|
||||
case "canonical": {
|
||||
result.url = url;
|
||||
break;
|
||||
}
|
||||
|
||||
case "image_src": {
|
||||
result.previews.push(url);
|
||||
break;
|
||||
}
|
||||
|
||||
case "alternate": {
|
||||
// Expressly for oembed support but we're liberal here and will let
|
||||
// other alternate links through. oembed defines an href, supplied by
|
||||
// the site, where you can fetch additional meta data about a page.
|
||||
// We'll let the client fetch the oembed data themselves, but they
|
||||
// need the data from this link.
|
||||
if (!result.alternate) {
|
||||
result.alternate = [];
|
||||
}
|
||||
|
||||
result.alternate.push({
|
||||
type: element.getAttribute("type"),
|
||||
href: element.getAttribute("href"),
|
||||
title: element.getAttribute("title"),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* Scrape thought the page content for additional content that may be used to
|
||||
* suppliment explicitly defined metadata. This includes:
|
||||
* - First few images, when no preview image metadata is explicitly defined.
|
||||
*
|
||||
* This adds properties to an existing result object.
|
||||
*
|
||||
* @param {Document} document - Document to extract data from.
|
||||
* @param {Object} result - Existing result object to add properties to.
|
||||
*/
|
||||
_getPageData(document, result) {
|
||||
if (result.previews.length < 1) {
|
||||
result.previews = this._getImageUrls(document);
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* Find the first few images in a document, for use as preview images.
|
||||
* Will return upto DISCOVER_IMAGES_MAX number of images.
|
||||
*
|
||||
* @note This is not very clever. It does not (yet) check if any of the
|
||||
* images may be appropriate as a preview image.
|
||||
*
|
||||
* @param {Document} document - Document to extract data from.
|
||||
* @return {[string]} Array of URLs.
|
||||
*/
|
||||
_getImageUrls(document) {
|
||||
let result = [];
|
||||
let elements = document.querySelectorAll("img");
|
||||
|
||||
for (let element of elements) {
|
||||
let src = element.getAttribute("src");
|
||||
if (src) {
|
||||
result.push(this._validateURL(document, UnescapeService.unescape(src)));
|
||||
|
||||
// We don't want a billion images.
|
||||
// TODO: Move this magic number to a const.
|
||||
if (result.length > DISCOVER_IMAGES_MAX) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
},
|
||||
|
||||
/**
|
||||
* Validate a URL. This involves resolving the URL if it's relative to the
|
||||
* document location, ensuring it's using an expected scheme, and stripping
|
||||
* the userPass portion of the URL.
|
||||
*
|
||||
* @param {Document} document - Document to use as the root location for a relative URL.
|
||||
* @param {string} url - URL to validate.
|
||||
* @return {string} Result URL.
|
||||
*/
|
||||
_validateURL(document, url) {
|
||||
let docURI = Services.io.newURI(document.documentURI);
|
||||
let uri = Services.io.newURI(docURI.resolve(url));
|
||||
|
||||
if (!["http", "https"].includes(uri.scheme)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
uri = uri.mutate()
|
||||
.setUserPass("")
|
||||
.finalize();
|
||||
|
||||
return uri.spec;
|
||||
},
|
||||
};
|
||||
|
|
@ -222,7 +222,6 @@ EXTRA_JS_MODULES += [
|
|||
'ObjectUtils.jsm',
|
||||
'offlineAppCache.jsm',
|
||||
'PageMenu.jsm',
|
||||
'PageMetadata.jsm',
|
||||
'PermissionsUtils.jsm',
|
||||
'PopupNotifications.jsm',
|
||||
'Preferences.jsm',
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
[DEFAULT]
|
||||
support-files =
|
||||
dummy_page.html
|
||||
metadata_*.html
|
||||
file_FinderIframeTest.html
|
||||
file_FinderSample.html
|
||||
file_WebNavigation_page1.html
|
||||
|
|
@ -51,6 +50,5 @@ skip-if = (verify && debug && (os == 'mac'))
|
|||
[browser_WebRequest_ancestors.js]
|
||||
[browser_WebRequest_cookies.js]
|
||||
[browser_WebRequest_filtering.js]
|
||||
[browser_PageMetadata.js]
|
||||
[browser_PromiseMessage.js]
|
||||
[browser_Troubleshoot.js]
|
||||
|
|
|
|||
|
|
@ -1,73 +0,0 @@
|
|||
/**
|
||||
* Tests PageMetadata.jsm, which extracts metadata and microdata from a
|
||||
* document.
|
||||
*/
|
||||
|
||||
var {PageMetadata} = ChromeUtils.import("resource://gre/modules/PageMetadata.jsm", {});
|
||||
|
||||
var rootURL = "http://example.com/browser/toolkit/modules/tests/browser/";
|
||||
|
||||
function promiseDocument(fileName) {
|
||||
let url = rootURL + fileName;
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
let xhr = new XMLHttpRequest();
|
||||
xhr.onload = () => resolve(xhr.responseXML);
|
||||
xhr.onerror = () => reject(new Error("Error loading document"));
|
||||
xhr.open("GET", url);
|
||||
xhr.responseType = "document";
|
||||
xhr.send();
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a simple document.
|
||||
*/
|
||||
add_task(async function simpleDoc() {
|
||||
let fileName = "metadata_simple.html";
|
||||
info(`Loading a simple page, ${fileName}`);
|
||||
|
||||
let doc = await promiseDocument(fileName);
|
||||
Assert.notEqual(doc, null,
|
||||
"Should have a document to analyse");
|
||||
|
||||
let data = PageMetadata.getData(doc);
|
||||
Assert.notEqual(data, null,
|
||||
"Should have non-null result");
|
||||
Assert.equal(data.url, rootURL + fileName,
|
||||
"Should have expected url property");
|
||||
Assert.equal(data.title, "Test Title",
|
||||
"Should have expected title property");
|
||||
Assert.equal(data.description, "A very simple test page",
|
||||
"Should have expected title property");
|
||||
});
|
||||
|
||||
add_task(async function titlesDoc() {
|
||||
let fileName = "metadata_titles.html";
|
||||
info(`Loading titles page, ${fileName}`);
|
||||
|
||||
let doc = await promiseDocument(fileName);
|
||||
Assert.notEqual(doc, null,
|
||||
"Should have a document to analyse");
|
||||
|
||||
let data = PageMetadata.getData(doc);
|
||||
Assert.notEqual(data, null,
|
||||
"Should have non-null result");
|
||||
Assert.equal(data.title, "Test Titles",
|
||||
"Should use the page title, not the open graph title");
|
||||
});
|
||||
|
||||
add_task(async function titlesFallbackDoc() {
|
||||
let fileName = "metadata_titles_fallback.html";
|
||||
info(`Loading titles page, ${fileName}`);
|
||||
|
||||
let doc = await promiseDocument(fileName);
|
||||
Assert.notEqual(doc, null,
|
||||
"Should have a document to analyse");
|
||||
|
||||
let data = PageMetadata.getData(doc);
|
||||
Assert.notEqual(data, null,
|
||||
"Should have non-null result");
|
||||
Assert.equal(data.title, "Title",
|
||||
"Should use the open graph title");
|
||||
});
|
||||
|
|
@ -1,10 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Test Title</title>
|
||||
<meta property="description" content="A very simple test page">
|
||||
</head>
|
||||
<body>
|
||||
Llama.
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Test Titles</title>
|
||||
<meta property="description" content="A very simple test page" />
|
||||
<meta property="og:title" content="Title" />
|
||||
</head>
|
||||
<body>
|
||||
Llama.
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,10 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta property="description" content="A very simple test page" />
|
||||
<meta property="og:title" content="Title" />
|
||||
</head>
|
||||
<body>
|
||||
Llama.
|
||||
</body>
|
||||
</html>
|
||||
Loading…
Reference in a new issue