mirror of
				https://github.com/mozilla/gecko-dev.git
				synced 2025-11-04 10:18:41 +02:00 
			
		
		
		
	
		
			
				
	
	
		
			251 lines
		
	
	
	
		
			6.6 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			251 lines
		
	
	
	
		
			6.6 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
/* This Source Code Form is subject to the terms of the Mozilla Public
 | 
						|
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 | 
						|
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 | 
						|
 | 
						|
const lazy = {};
 | 
						|
 | 
						|
ChromeUtils.defineESModuleGetters(lazy, {
 | 
						|
  JsonSchemaValidator:
 | 
						|
    "resource://gre/modules/components-utils/JsonSchemaValidator.sys.mjs",
 | 
						|
  OpenGraphPageData: "resource:///modules/pagedata/OpenGraphPageData.sys.mjs",
 | 
						|
  SchemaOrgPageData: "resource:///modules/pagedata/SchemaOrgPageData.sys.mjs",
 | 
						|
  TwitterPageData: "resource:///modules/pagedata/TwitterPageData.sys.mjs",
 | 
						|
});
 | 
						|
 | 
						|
ChromeUtils.defineLazyGetter(lazy, "logConsole", function () {
 | 
						|
  return console.createInstance({
 | 
						|
    prefix: "PageData",
 | 
						|
    maxLogLevel: Services.prefs.getBoolPref("browser.pagedata.log", false)
 | 
						|
      ? "Debug"
 | 
						|
      : "Warn",
 | 
						|
  });
 | 
						|
});
 | 
						|
 | 
						|
/**
 | 
						|
 * The list of page data collectors. These should be sorted in order of
 | 
						|
 * specificity, if the same piece of data is provided by two collectors then the
 | 
						|
 * earlier wins.
 | 
						|
 *
 | 
						|
 * Collectors must provide a `collect` function which will be passed the
 | 
						|
 * document object and should return the PageData structure. The function may be
 | 
						|
 * asynchronous if needed.
 | 
						|
 *
 | 
						|
 * The data returned need not be valid, collectors should return whatever they
 | 
						|
 * can and then we drop anything that is invalid once all data is joined.
 | 
						|
 */
 | 
						|
ChromeUtils.defineLazyGetter(lazy, "DATA_COLLECTORS", function () {
 | 
						|
  return [lazy.SchemaOrgPageData, lazy.OpenGraphPageData, lazy.TwitterPageData];
 | 
						|
});
 | 
						|
 | 
						|
let SCHEMAS = new Map();
 | 
						|
 | 
						|
/**
 | 
						|
 * Loads the schema for the given name.
 | 
						|
 *
 | 
						|
 * @param {string} schemaName
 | 
						|
 *   The name of the schema to load.
 | 
						|
 * @returns {object}
 | 
						|
 *   The loaded schema.
 | 
						|
 */
 | 
						|
async function loadSchema(schemaName) {
 | 
						|
  if (SCHEMAS.has(schemaName)) {
 | 
						|
    return SCHEMAS.get(schemaName);
 | 
						|
  }
 | 
						|
 | 
						|
  let url = `chrome://browser/content/pagedata/schemas/${schemaName.toLocaleLowerCase()}.schema.json`;
 | 
						|
  let response = await fetch(url);
 | 
						|
  if (!response.ok) {
 | 
						|
    throw new Error(`Failed to load schema: ${response.statusText}`);
 | 
						|
  }
 | 
						|
 | 
						|
  let schema = await response.json();
 | 
						|
  SCHEMAS.set(schemaName, schema);
 | 
						|
  return schema;
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Validates the data using the schema with the given name.
 | 
						|
 *
 | 
						|
 * @param {string} schemaName
 | 
						|
 *   The name of the schema to validate against.
 | 
						|
 * @param {object} data
 | 
						|
 *   The data to validate.
 | 
						|
 */
 | 
						|
async function validateData(schemaName, data) {
 | 
						|
  let schema = await loadSchema(schemaName.toLocaleLowerCase());
 | 
						|
 | 
						|
  let result = lazy.JsonSchemaValidator.validate(data, schema, {
 | 
						|
    allowExplicitUndefinedProperties: true,
 | 
						|
    // Allowed for future expansion of the schema.
 | 
						|
    allowAdditionalProperties: true,
 | 
						|
  });
 | 
						|
 | 
						|
  if (!result.valid) {
 | 
						|
    throw result.error;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * A shared API that can be used in parent or child processes
 | 
						|
 */
 | 
						|
export const PageDataSchema = {
 | 
						|
  // Enumeration of data types. The keys must match the schema name.
 | 
						|
  DATA_TYPE: Object.freeze({
 | 
						|
    // Note that 1 and 2 were used as types in earlier versions and should not be used here.
 | 
						|
    PRODUCT: 3,
 | 
						|
    DOCUMENT: 4,
 | 
						|
    ARTICLE: 5,
 | 
						|
    AUDIO: 6,
 | 
						|
    VIDEO: 7,
 | 
						|
  }),
 | 
						|
 | 
						|
  /**
 | 
						|
   * Gets the data type name.
 | 
						|
   *
 | 
						|
   * @param {DATA_TYPE} type
 | 
						|
   *   The data type from the DATA_TYPE enumeration
 | 
						|
   *
 | 
						|
   * @returns {string | null} The name for the type or null if not found.
 | 
						|
   */
 | 
						|
  nameForType(type) {
 | 
						|
    for (let [name, value] of Object.entries(this.DATA_TYPE)) {
 | 
						|
      if (value == type) {
 | 
						|
        return name;
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    return null;
 | 
						|
  },
 | 
						|
 | 
						|
  /**
 | 
						|
   * Asynchronously validates some page data against the expected schema. Throws
 | 
						|
   * an exception if validation fails.
 | 
						|
   *
 | 
						|
   * @param {DATA_TYPE} type
 | 
						|
   *   The data type from the DATA_TYPE enumeration
 | 
						|
   * @param {object} data
 | 
						|
   *   The page data
 | 
						|
   */
 | 
						|
  async validateData(type, data) {
 | 
						|
    let name = this.nameForType(type);
 | 
						|
 | 
						|
    if (!name) {
 | 
						|
      throw new Error(`Unknown data type ${type}`);
 | 
						|
    }
 | 
						|
 | 
						|
    await validateData(name, data);
 | 
						|
  },
 | 
						|
 | 
						|
  /**
 | 
						|
   * Asynchronously validates an entire PageData structure. Any invalid or
 | 
						|
   * unknown data types are dropped.
 | 
						|
   *
 | 
						|
   * @param {PageData} pageData
 | 
						|
   *   The page data
 | 
						|
   *
 | 
						|
   * @returns {PageData} The validated page data structure
 | 
						|
   */
 | 
						|
  async validatePageData(pageData) {
 | 
						|
    let { data: dataMap = {}, ...general } = pageData;
 | 
						|
 | 
						|
    await validateData("general", general);
 | 
						|
 | 
						|
    let validData = {};
 | 
						|
 | 
						|
    for (let [type, data] of Object.entries(dataMap)) {
 | 
						|
      let name = this.nameForType(type);
 | 
						|
      // Ignore unknown types here.
 | 
						|
      if (!name) {
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
 | 
						|
      try {
 | 
						|
        await validateData(name, data);
 | 
						|
 | 
						|
        validData[type] = data;
 | 
						|
      } catch (e) {
 | 
						|
        // Invalid data is dropped.
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    return {
 | 
						|
      ...general,
 | 
						|
      data: validData,
 | 
						|
    };
 | 
						|
  },
 | 
						|
 | 
						|
  /**
 | 
						|
   * Adds new page data into an existing data set. Any existing data is not
 | 
						|
   * overwritten.
 | 
						|
   *
 | 
						|
   * @param {PageData} existingPageData
 | 
						|
   *   The existing page data
 | 
						|
   * @param {PageData} newPageData
 | 
						|
   *   The new page data
 | 
						|
   *
 | 
						|
   * @returns {PageData} The joined data.
 | 
						|
   */
 | 
						|
  coalescePageData(existingPageData, newPageData) {
 | 
						|
    // Split out the general data from the map of specific data.
 | 
						|
    let { data: existingMap = {}, ...existingGeneral } = existingPageData;
 | 
						|
    let { data: newMap = {}, ...newGeneral } = newPageData;
 | 
						|
 | 
						|
    Object.assign(newGeneral, existingGeneral);
 | 
						|
 | 
						|
    let dataMap = {};
 | 
						|
    for (let [type, data] of Object.entries(existingMap)) {
 | 
						|
      if (type in newMap) {
 | 
						|
        dataMap[type] = Object.assign({}, newMap[type], data);
 | 
						|
      } else {
 | 
						|
        dataMap[type] = data;
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    for (let [type, data] of Object.entries(newMap)) {
 | 
						|
      if (!(type in dataMap)) {
 | 
						|
        dataMap[type] = data;
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    return {
 | 
						|
      ...newGeneral,
 | 
						|
      data: dataMap,
 | 
						|
    };
 | 
						|
  },
 | 
						|
 | 
						|
  /**
 | 
						|
   * Collects page data from a DOM document.
 | 
						|
   *
 | 
						|
   * @param {Document} document
 | 
						|
   *   The DOM document to collect data from
 | 
						|
   *
 | 
						|
   * @returns {Promise<PageData | null>} The data collected or null in case of
 | 
						|
   *   error.
 | 
						|
   */
 | 
						|
  async collectPageData(document) {
 | 
						|
    lazy.logConsole.debug("Starting collection", document.documentURI);
 | 
						|
 | 
						|
    let pending = lazy.DATA_COLLECTORS.map(async collector => {
 | 
						|
      try {
 | 
						|
        return await collector.collect(document);
 | 
						|
      } catch (e) {
 | 
						|
        lazy.logConsole.error("Error collecting page data", e);
 | 
						|
        return null;
 | 
						|
      }
 | 
						|
    });
 | 
						|
 | 
						|
    let pageDataList = await Promise.all(pending);
 | 
						|
 | 
						|
    let pageData = pageDataList.reduce(PageDataSchema.coalescePageData, {
 | 
						|
      date: Date.now(),
 | 
						|
      url: document.documentURI,
 | 
						|
    });
 | 
						|
 | 
						|
    try {
 | 
						|
      return this.validatePageData(pageData);
 | 
						|
    } catch (e) {
 | 
						|
      lazy.logConsole.error("Failed to collect valid page data", e);
 | 
						|
      return null;
 | 
						|
    }
 | 
						|
  },
 | 
						|
};
 |