forked from mirrors/gecko-dev
		
	
		
			
				
	
	
		
			249 lines
		
	
	
	
		
			6.5 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			249 lines
		
	
	
	
		
			6.5 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| /* This Source Code Form is subject to the terms of the Mozilla Public
 | |
|  * License, v. 2.0. If a copy of the MPL was not distributed with this
 | |
|  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 | |
| 
 | |
| const lazy = {};
 | |
| 
 | |
| ChromeUtils.defineESModuleGetters(lazy, {
 | |
|   JsonSchemaValidator:
 | |
|     "resource://gre/modules/components-utils/JsonSchemaValidator.sys.mjs",
 | |
|   OpenGraphPageData: "resource:///modules/pagedata/OpenGraphPageData.sys.mjs",
 | |
|   SchemaOrgPageData: "resource:///modules/pagedata/SchemaOrgPageData.sys.mjs",
 | |
|   TwitterPageData: "resource:///modules/pagedata/TwitterPageData.sys.mjs",
 | |
| });
 | |
| 
 | |
| ChromeUtils.defineLazyGetter(lazy, "logConsole", function () {
 | |
|   return console.createInstance({
 | |
|     prefix: "PageData",
 | |
|     maxLogLevel: Services.prefs.getBoolPref("browser.pagedata.log", false)
 | |
|       ? "Debug"
 | |
|       : "Warn",
 | |
|   });
 | |
| });
 | |
| 
 | |
| /**
 | |
|  * The list of page data collectors. These should be sorted in order of
 | |
|  * specificity, if the same piece of data is provided by two collectors then the
 | |
|  * earlier wins.
 | |
|  *
 | |
|  * Collectors must provide a `collect` function which will be passed the
 | |
|  * document object and should return the PageData structure. The function may be
 | |
|  * asynchronous if needed.
 | |
|  *
 | |
|  * The data returned need not be valid, collectors should return whatever they
 | |
|  * can and then we drop anything that is invalid once all data is joined.
 | |
|  */
 | |
| ChromeUtils.defineLazyGetter(lazy, "DATA_COLLECTORS", function () {
 | |
|   return [lazy.SchemaOrgPageData, lazy.OpenGraphPageData, lazy.TwitterPageData];
 | |
| });
 | |
| 
 | |
| let SCHEMAS = new Map();
 | |
| 
 | |
| /**
 | |
|  * Loads the schema for the given name.
 | |
|  *
 | |
|  * @param {string} schemaName
 | |
|  *   The name of the schema to load.
 | |
|  */
 | |
| async function loadSchema(schemaName) {
 | |
|   if (SCHEMAS.has(schemaName)) {
 | |
|     return SCHEMAS.get(schemaName);
 | |
|   }
 | |
| 
 | |
|   let url = `chrome://browser/content/pagedata/schemas/${schemaName.toLocaleLowerCase()}.schema.json`;
 | |
|   let response = await fetch(url);
 | |
|   if (!response.ok) {
 | |
|     throw new Error(`Failed to load schema: ${response.statusText}`);
 | |
|   }
 | |
| 
 | |
|   let schema = await response.json();
 | |
|   SCHEMAS.set(schemaName, schema);
 | |
|   return schema;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Validates the data using the schema with the given name.
 | |
|  *
 | |
|  * @param {string} schemaName
 | |
|  *   The name of the schema to validate against.
 | |
|  * @param {object} data
 | |
|  *   The data to validate.
 | |
|  */
 | |
| async function validateData(schemaName, data) {
 | |
|   let schema = await loadSchema(schemaName.toLocaleLowerCase());
 | |
| 
 | |
|   let result = lazy.JsonSchemaValidator.validate(data, schema, {
 | |
|     allowExplicitUndefinedProperties: true,
 | |
|     // Allowed for future expansion of the schema.
 | |
|     allowAdditionalProperties: true,
 | |
|   });
 | |
| 
 | |
|   if (!result.valid) {
 | |
|     throw result.error;
 | |
|   }
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * A shared API that can be used in parent or child processes
 | |
|  */
 | |
| export const PageDataSchema = {
 | |
|   // Enumeration of data types. The keys must match the schema name.
 | |
|   DATA_TYPE: Object.freeze({
 | |
|     // Note that 1 and 2 were used as types in earlier versions and should not be used here.
 | |
|     PRODUCT: 3,
 | |
|     DOCUMENT: 4,
 | |
|     ARTICLE: 5,
 | |
|     AUDIO: 6,
 | |
|     VIDEO: 7,
 | |
|   }),
 | |
| 
 | |
|   /**
 | |
|    * Gets the data type name.
 | |
|    *
 | |
|    * @param {DATA_TYPE} type
 | |
|    *   The data type from the DATA_TYPE enumeration
 | |
|    *
 | |
|    * @returns {string | null} The name for the type or null if not found.
 | |
|    */
 | |
|   nameForType(type) {
 | |
|     for (let [name, value] of Object.entries(this.DATA_TYPE)) {
 | |
|       if (value == type) {
 | |
|         return name;
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     return null;
 | |
|   },
 | |
| 
 | |
|   /**
 | |
|    * Asynchronously validates some page data against the expected schema. Throws
 | |
|    * an exception if validation fails.
 | |
|    *
 | |
|    * @param {DATA_TYPE} type
 | |
|    *   The data type from the DATA_TYPE enumeration
 | |
|    * @param {object} data
 | |
|    *   The page data
 | |
|    */
 | |
|   async validateData(type, data) {
 | |
|     let name = this.nameForType(type);
 | |
| 
 | |
|     if (!name) {
 | |
|       throw new Error(`Unknown data type ${type}`);
 | |
|     }
 | |
| 
 | |
|     return validateData(name, data);
 | |
|   },
 | |
| 
 | |
|   /**
 | |
|    * Asynchronously validates an entire PageData structure. Any invalid or
 | |
|    * unknown data types are dropped.
 | |
|    *
 | |
|    * @param {PageData} pageData
 | |
|    *   The page data
 | |
|    *
 | |
|    * @returns {PageData} The validated page data structure
 | |
|    */
 | |
|   async validatePageData(pageData) {
 | |
|     let { data: dataMap = {}, ...general } = pageData;
 | |
| 
 | |
|     await validateData("general", general);
 | |
| 
 | |
|     let validData = {};
 | |
| 
 | |
|     for (let [type, data] of Object.entries(dataMap)) {
 | |
|       let name = this.nameForType(type);
 | |
|       // Ignore unknown types here.
 | |
|       if (!name) {
 | |
|         continue;
 | |
|       }
 | |
| 
 | |
|       try {
 | |
|         await validateData(name, data);
 | |
| 
 | |
|         validData[type] = data;
 | |
|       } catch (e) {
 | |
|         // Invalid data is dropped.
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     return {
 | |
|       ...general,
 | |
|       data: validData,
 | |
|     };
 | |
|   },
 | |
| 
 | |
|   /**
 | |
|    * Adds new page data into an existing data set. Any existing data is not
 | |
|    * overwritten.
 | |
|    *
 | |
|    * @param {PageData} existingPageData
 | |
|    *   The existing page data
 | |
|    * @param {PageData} newPageData
 | |
|    *   The new page data
 | |
|    *
 | |
|    * @returns {PageData} The joined data.
 | |
|    */
 | |
|   coalescePageData(existingPageData, newPageData) {
 | |
|     // Split out the general data from the map of specific data.
 | |
|     let { data: existingMap = {}, ...existingGeneral } = existingPageData;
 | |
|     let { data: newMap = {}, ...newGeneral } = newPageData;
 | |
| 
 | |
|     Object.assign(newGeneral, existingGeneral);
 | |
| 
 | |
|     let dataMap = {};
 | |
|     for (let [type, data] of Object.entries(existingMap)) {
 | |
|       if (type in newMap) {
 | |
|         dataMap[type] = Object.assign({}, newMap[type], data);
 | |
|       } else {
 | |
|         dataMap[type] = data;
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     for (let [type, data] of Object.entries(newMap)) {
 | |
|       if (!(type in dataMap)) {
 | |
|         dataMap[type] = data;
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     return {
 | |
|       ...newGeneral,
 | |
|       data: dataMap,
 | |
|     };
 | |
|   },
 | |
| 
 | |
|   /**
 | |
|    * Collects page data from a DOM document.
 | |
|    *
 | |
|    * @param {Document} document
 | |
|    *   The DOM document to collect data from
 | |
|    *
 | |
|    * @returns {Promise<PageData | null>} The data collected or null in case of
 | |
|    *   error.
 | |
|    */
 | |
|   async collectPageData(document) {
 | |
|     lazy.logConsole.debug("Starting collection", document.documentURI);
 | |
| 
 | |
|     let pending = lazy.DATA_COLLECTORS.map(async collector => {
 | |
|       try {
 | |
|         return await collector.collect(document);
 | |
|       } catch (e) {
 | |
|         lazy.logConsole.error("Error collecting page data", e);
 | |
|         return null;
 | |
|       }
 | |
|     });
 | |
| 
 | |
|     let pageDataList = await Promise.all(pending);
 | |
| 
 | |
|     let pageData = pageDataList.reduce(PageDataSchema.coalescePageData, {
 | |
|       date: Date.now(),
 | |
|       url: document.documentURI,
 | |
|     });
 | |
| 
 | |
|     try {
 | |
|       return this.validatePageData(pageData);
 | |
|     } catch (e) {
 | |
|       lazy.logConsole.error("Failed to collect valid page data", e);
 | |
|       return null;
 | |
|     }
 | |
|   },
 | |
| };
 | 
