mirror of
				https://github.com/mozilla/gecko-dev.git
				synced 2025-11-04 02:09:05 +02:00 
			
		
		
		
	
		
			
				
	
	
		
			1613 lines
		
	
	
	
		
			51 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			1613 lines
		
	
	
	
		
			51 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
/* This Source Code Form is subject to the terms of the Mozilla Public
 | 
						|
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 | 
						|
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 | 
						|
 | 
						|
import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs";
 | 
						|
 | 
						|
const lazy = {};
 | 
						|
 | 
						|
ChromeUtils.defineESModuleGetters(lazy, {
 | 
						|
  clearTimeout: "resource://gre/modules/Timer.sys.mjs",
 | 
						|
  setTimeout: "resource://gre/modules/Timer.sys.mjs",
 | 
						|
});
 | 
						|
 | 
						|
XPCOMUtils.defineLazyPreferenceGetter(
 | 
						|
  lazy,
 | 
						|
  "serpEventTelemetryCategorization",
 | 
						|
  "browser.search.serpEventTelemetryCategorization.enabled",
 | 
						|
  false
 | 
						|
);
 | 
						|
 | 
						|
export const CATEGORIZATION_SETTINGS = {
 | 
						|
  MAX_DOMAINS_TO_CATEGORIZE: 10,
 | 
						|
};
 | 
						|
 | 
						|
// Duplicated from SearchSERPTelemetry to avoid loading the module on content
 | 
						|
// startup.
 | 
						|
const SEARCH_TELEMETRY_SHARED = {
 | 
						|
  PROVIDER_INFO: "SearchTelemetry:ProviderInfo",
 | 
						|
  LOAD_TIMEOUT: "SearchTelemetry:LoadTimeout",
 | 
						|
  SPA_LOAD_TIMEOUT: "SearchTelemetry:SPALoadTimeout",
 | 
						|
};
 | 
						|
 | 
						|
/**
 | 
						|
 * Standard events mapped to the telemetry action.
 | 
						|
 */
 | 
						|
const EVENT_TYPE_TO_ACTION = {
 | 
						|
  click: "clicked",
 | 
						|
};
 | 
						|
 | 
						|
/**
 | 
						|
 * A map of object conditions mapped to the condition that should be run when
 | 
						|
 * an event is triggered. The condition name is referenced in Remote Settings
 | 
						|
 * under the optional `condition` string for an event listener.
 | 
						|
 */
 | 
						|
const CONDITIONS = {
 | 
						|
  keydownEnter: event => event.key == "Enter",
 | 
						|
};
 | 
						|
 | 
						|
/**
 | 
						|
 * SearchProviders looks after keeping track of the search provider information
 | 
						|
 * received from the main process.
 | 
						|
 *
 | 
						|
 * It is separate to SearchTelemetryChild so that it is not constructed for each
 | 
						|
 * tab, but once per process.
 | 
						|
 */
 | 
						|
class SearchProviders {
 | 
						|
  constructor() {
 | 
						|
    this._searchProviderInfo = null;
 | 
						|
    Services.cpmm.sharedData.addEventListener("change", this);
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Gets the search provider information for any provider with advert information.
 | 
						|
   * If there is nothing in the cache, it will obtain it from shared data.
 | 
						|
   *
 | 
						|
   * @returns {object} Returns the search provider information.
 | 
						|
   * @see SearchTelemetry.sys.mjs
 | 
						|
   */
 | 
						|
  get info() {
 | 
						|
    if (this._searchProviderInfo) {
 | 
						|
      return this._searchProviderInfo;
 | 
						|
    }
 | 
						|
 | 
						|
    this._searchProviderInfo = Services.cpmm.sharedData.get(
 | 
						|
      SEARCH_TELEMETRY_SHARED.PROVIDER_INFO
 | 
						|
    );
 | 
						|
 | 
						|
    if (!this._searchProviderInfo) {
 | 
						|
      return null;
 | 
						|
    }
 | 
						|
 | 
						|
    this._searchProviderInfo = this._searchProviderInfo
 | 
						|
      // Filter-out non-ad providers so that we're not trying to match against
 | 
						|
      // those unnecessarily.
 | 
						|
      .filter(p => "extraAdServersRegexps" in p)
 | 
						|
      // Pre-build the regular expressions.
 | 
						|
      .map(p => {
 | 
						|
        p.adServerAttributes = p.adServerAttributes ?? [];
 | 
						|
        if (p.shoppingTab?.inspectRegexpInSERP) {
 | 
						|
          p.shoppingTab.regexp = new RegExp(p.shoppingTab.regexp);
 | 
						|
        }
 | 
						|
        return {
 | 
						|
          ...p,
 | 
						|
          searchPageRegexp: new RegExp(p.searchPageRegexp),
 | 
						|
          extraAdServersRegexps: p.extraAdServersRegexps.map(
 | 
						|
            r => new RegExp(r)
 | 
						|
          ),
 | 
						|
        };
 | 
						|
      });
 | 
						|
 | 
						|
    return this._searchProviderInfo;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Handles events received from sharedData notifications.
 | 
						|
   *
 | 
						|
   * @param {object} event The event details.
 | 
						|
   */
 | 
						|
  handleEvent(event) {
 | 
						|
    switch (event.type) {
 | 
						|
      case "change": {
 | 
						|
        if (event.changedKeys.includes(SEARCH_TELEMETRY_SHARED.PROVIDER_INFO)) {
 | 
						|
          // Just null out the provider information for now, we'll fetch it next
 | 
						|
          // time we need it.
 | 
						|
          this._searchProviderInfo = null;
 | 
						|
        }
 | 
						|
        break;
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * @typedef {object} EventListenerParam
 | 
						|
 * @property {string} eventType
 | 
						|
 *  The type of event the listener should listen for. If the event type is
 | 
						|
 *  is non-standard, it should correspond to a definition in
 | 
						|
 *  CUSTOM_EVENT_TYPE_TO_DATA that will re-map it to a standard type. TODO
 | 
						|
 * @property {string} target
 | 
						|
 *  The type of component that was the source of the event.
 | 
						|
 * @property {string | null} action
 | 
						|
 *  The action that should be reported in telemetry.
 | 
						|
 */
 | 
						|
 | 
						|
/**
 | 
						|
 * Provides a way to add listeners to elements, as well as unload them.
 | 
						|
 */
 | 
						|
class ListenerHelper {
 | 
						|
  /**
 | 
						|
   * Adds each event listener in an array of event listeners to each element
 | 
						|
   * in an array of elements, and sets their unloading.
 | 
						|
   *
 | 
						|
   * @param {Array<Element>} elements
 | 
						|
   *  DOM elements to add event listeners to.
 | 
						|
   * @param {Array<EventListenerParam>} eventListenerParams
 | 
						|
   *  The type of event to add the listener to.
 | 
						|
   * @param {string} target
 | 
						|
   */
 | 
						|
  static addListeners(elements, eventListenerParams, target) {
 | 
						|
    if (!elements?.length || !eventListenerParams?.length) {
 | 
						|
      return;
 | 
						|
    }
 | 
						|
 | 
						|
    let document = elements[0].ownerGlobal.document;
 | 
						|
    let callback = documentToEventCallbackMap.get(document);
 | 
						|
    if (!callback) {
 | 
						|
      return;
 | 
						|
    }
 | 
						|
 | 
						|
    // The map might have entries from previous callers, so we must ensure
 | 
						|
    // we don't discard existing event listener callbacks.
 | 
						|
    let removeListenerCallbacks = [];
 | 
						|
    if (documentToRemoveEventListenersMap.has(document)) {
 | 
						|
      removeListenerCallbacks = documentToRemoveEventListenersMap.get(document);
 | 
						|
    }
 | 
						|
 | 
						|
    for (let params of eventListenerParams) {
 | 
						|
      let removeListeners = ListenerHelper.addListener(
 | 
						|
        elements,
 | 
						|
        params,
 | 
						|
        target,
 | 
						|
        callback
 | 
						|
      );
 | 
						|
      removeListenerCallbacks = removeListenerCallbacks.concat(removeListeners);
 | 
						|
    }
 | 
						|
 | 
						|
    documentToRemoveEventListenersMap.set(document, removeListenerCallbacks);
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Add an event listener to each element in an array of elements.
 | 
						|
   *
 | 
						|
   * @param {Array<Element>} elements
 | 
						|
   *  DOM elements to add event listeners to.
 | 
						|
   * @param {EventListenerParam} eventListenerParam
 | 
						|
   * @param {string} target
 | 
						|
   * @param {Function} callback
 | 
						|
   * @returns {Array<function>} Array of remove event listener functions.
 | 
						|
   */
 | 
						|
  static addListener(elements, eventListenerParam, target, callback) {
 | 
						|
    let { action, eventType, target: customTarget } = eventListenerParam;
 | 
						|
 | 
						|
    if (customTarget) {
 | 
						|
      target = customTarget;
 | 
						|
    }
 | 
						|
 | 
						|
    if (!action) {
 | 
						|
      action = EVENT_TYPE_TO_ACTION[eventType];
 | 
						|
      if (!action) {
 | 
						|
        return [];
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    // Some events might have specific conditions we want to check before
 | 
						|
    // registering an engagement event.
 | 
						|
    let eventCallback;
 | 
						|
    if (eventListenerParam.condition) {
 | 
						|
      if (CONDITIONS[eventListenerParam.condition]) {
 | 
						|
        let condition = CONDITIONS[eventListenerParam.condition];
 | 
						|
        eventCallback = async event => {
 | 
						|
          let start = Cu.now();
 | 
						|
          if (condition(event)) {
 | 
						|
            callback({ action, target });
 | 
						|
          }
 | 
						|
          ChromeUtils.addProfilerMarker(
 | 
						|
            "SearchSERPTelemetryChild._eventCallback",
 | 
						|
            start,
 | 
						|
            "Call cached function before callback."
 | 
						|
          );
 | 
						|
        };
 | 
						|
      } else {
 | 
						|
        // If a component included a condition, but it wasn't found it is
 | 
						|
        // due to the fact that it was added in a more recent Firefox version
 | 
						|
        // than what is provided via search-telemetry-v2. Since the version of
 | 
						|
        // Firefox the user is using doesn't include this condition,
 | 
						|
        // we shouldn't add the event.
 | 
						|
        return [];
 | 
						|
      }
 | 
						|
    } else {
 | 
						|
      eventCallback = () => {
 | 
						|
        callback({ action, target });
 | 
						|
      };
 | 
						|
    }
 | 
						|
 | 
						|
    let removeListenerCallbacks = [];
 | 
						|
    for (let element of elements) {
 | 
						|
      element.addEventListener(eventType, eventCallback);
 | 
						|
      removeListenerCallbacks.push(() => {
 | 
						|
        element.removeEventListener(eventType, eventCallback);
 | 
						|
      });
 | 
						|
    }
 | 
						|
    return removeListenerCallbacks;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Scans SERPs for ad components.
 | 
						|
 */
 | 
						|
class SearchAdImpression {
 | 
						|
  /**
 | 
						|
   * A reference to ad component information that is used if an anchor
 | 
						|
   * element could not be categorized to a specific ad component.
 | 
						|
   *
 | 
						|
   * @type {object}
 | 
						|
   */
 | 
						|
  #defaultComponent = null;
 | 
						|
 | 
						|
  /**
 | 
						|
   * Maps DOM elements to AdData.
 | 
						|
   *
 | 
						|
   * @type {Map<Element, AdData>}
 | 
						|
   *
 | 
						|
   * @typedef AdData
 | 
						|
   * @type {object}
 | 
						|
   * @property {string} type
 | 
						|
   *  The type of ad component.
 | 
						|
   * @property {number} adsLoaded
 | 
						|
   *  The number of ads counted as loaded for the component.
 | 
						|
   * @property {boolean} countChildren
 | 
						|
   *  Whether all the children were counted for the component.
 | 
						|
   */
 | 
						|
  #elementToAdDataMap = new Map();
 | 
						|
 | 
						|
  /**
 | 
						|
   * An array of components to do a top-down search.
 | 
						|
   */
 | 
						|
  #topDownComponents = [];
 | 
						|
 | 
						|
  /**
 | 
						|
   * A reference the providerInfo for this SERP.
 | 
						|
   *
 | 
						|
   * @type {object}
 | 
						|
   */
 | 
						|
  #providerInfo = null;
 | 
						|
 | 
						|
  set providerInfo(providerInfo) {
 | 
						|
    if (this.#providerInfo?.telemetryId == providerInfo.telemetryId) {
 | 
						|
      return;
 | 
						|
    }
 | 
						|
 | 
						|
    this.#providerInfo = providerInfo;
 | 
						|
 | 
						|
    // Reset values.
 | 
						|
    this.#topDownComponents = [];
 | 
						|
 | 
						|
    for (let component of this.#providerInfo.components) {
 | 
						|
      if (component.default) {
 | 
						|
        this.#defaultComponent = component;
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
      if (component.topDown) {
 | 
						|
        this.#topDownComponents.push(component);
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Check if the page has a shopping tab.
 | 
						|
   *
 | 
						|
   * @param {Document} document
 | 
						|
   * @return {boolean}
 | 
						|
   *   Whether the page has a shopping tab. Defaults to false.
 | 
						|
   */
 | 
						|
  hasShoppingTab(document) {
 | 
						|
    if (!this.#providerInfo?.shoppingTab) {
 | 
						|
      return false;
 | 
						|
    }
 | 
						|
 | 
						|
    // If a provider has the inspectRegexpInSERP, we assume there must be an
 | 
						|
    // associated regexp that must be used on any hrefs matched by the elements
 | 
						|
    // found using the selector. If inspectRegexpInSERP is false, then check if
 | 
						|
    // the number of items found using the selector matches exactly one element
 | 
						|
    // to ensure we've used a fine-grained search.
 | 
						|
    let elements = document.querySelectorAll(
 | 
						|
      this.#providerInfo.shoppingTab.selector
 | 
						|
    );
 | 
						|
    if (this.#providerInfo.shoppingTab.inspectRegexpInSERP) {
 | 
						|
      let regexp = this.#providerInfo.shoppingTab.regexp;
 | 
						|
      for (let element of elements) {
 | 
						|
        let href = element.getAttribute("href");
 | 
						|
        if (href && regexp.test(href)) {
 | 
						|
          this.#recordElementData(element, {
 | 
						|
            type: "shopping_tab",
 | 
						|
            count: 1,
 | 
						|
          });
 | 
						|
          return true;
 | 
						|
        }
 | 
						|
      }
 | 
						|
    } else if (elements.length == 1) {
 | 
						|
      this.#recordElementData(elements[0], {
 | 
						|
        type: "shopping_tab",
 | 
						|
        count: 1,
 | 
						|
      });
 | 
						|
      return true;
 | 
						|
    }
 | 
						|
    return false;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Examine the list of anchors and the document object and find components
 | 
						|
   * on the page.
 | 
						|
   *
 | 
						|
   * With the list of anchors, go through each and find the component it
 | 
						|
   * belongs to and save it in elementToAdDataMap.
 | 
						|
   *
 | 
						|
   * Then, with the document object find components and save the results to
 | 
						|
   * elementToAdDataMap.
 | 
						|
   *
 | 
						|
   * Lastly, combine the results together in a new Map that contains the number
 | 
						|
   * of loaded, visible, and blocked results for the component.
 | 
						|
   *
 | 
						|
   * @param {HTMLCollectionOf<HTMLAnchorElement>} anchors
 | 
						|
   * @param {Document} document
 | 
						|
   *
 | 
						|
   * @returns {Map<string, object>}
 | 
						|
   *  A map where the key is a string containing the type of ad component
 | 
						|
   *  and the value is an object containing the number of adsLoaded,
 | 
						|
   *  adsVisible, and adsHidden within the component.
 | 
						|
   */
 | 
						|
  categorize(anchors, document) {
 | 
						|
    // Used for various functions to make relative URLs absolute.
 | 
						|
    let origin = new URL(document.documentURI).origin;
 | 
						|
 | 
						|
    // Bottom up approach.
 | 
						|
    this.#categorizeAnchors(anchors, origin);
 | 
						|
 | 
						|
    // Top down approach.
 | 
						|
    this.#categorizeDocument(document);
 | 
						|
 | 
						|
    let componentToVisibilityMap = new Map();
 | 
						|
    let hrefToComponentMap = new Map();
 | 
						|
 | 
						|
    let innerWindowHeight = document.ownerGlobal.innerHeight;
 | 
						|
    let scrollY = document.ownerGlobal.scrollY;
 | 
						|
 | 
						|
    // Iterate over the results:
 | 
						|
    // - If it's searchbox add event listeners.
 | 
						|
    // - If it is a non_ads_link, map its href to component type.
 | 
						|
    // - For others, map its component type and check visibility.
 | 
						|
    for (let [element, data] of this.#elementToAdDataMap.entries()) {
 | 
						|
      if (data.type == "incontent_searchbox") {
 | 
						|
        // Bug 1880413: Deprecate hard coding the incontent search box.
 | 
						|
        // If searchbox has child elements, observe those, otherwise
 | 
						|
        // fallback to its parent element.
 | 
						|
        let searchElements = data.childElements.length
 | 
						|
          ? data.childElements
 | 
						|
          : [element];
 | 
						|
        ListenerHelper.addListeners(
 | 
						|
          searchElements,
 | 
						|
          [
 | 
						|
            { eventType: "click", target: data.type },
 | 
						|
            {
 | 
						|
              eventType: "keydown",
 | 
						|
              target: data.type,
 | 
						|
              action: "submitted",
 | 
						|
              condition: "keydownEnter",
 | 
						|
            },
 | 
						|
          ],
 | 
						|
          data.type
 | 
						|
        );
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
      if (data.childElements.length) {
 | 
						|
        for (let child of data.childElements) {
 | 
						|
          let href = this.#extractHref(child, origin);
 | 
						|
          if (href) {
 | 
						|
            hrefToComponentMap.set(href, data.type);
 | 
						|
          }
 | 
						|
        }
 | 
						|
      } else {
 | 
						|
        let href = this.#extractHref(element, origin);
 | 
						|
        if (href) {
 | 
						|
          hrefToComponentMap.set(href, data.type);
 | 
						|
        }
 | 
						|
      }
 | 
						|
 | 
						|
      // If the component is a non_ads_link, skip visibility checks.
 | 
						|
      if (data.type == "non_ads_link") {
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
 | 
						|
      // If proxy children were found, check the visibility of all of them
 | 
						|
      // otherwise just check the visiblity of the first child.
 | 
						|
      let childElements;
 | 
						|
      if (data.proxyChildElements.length) {
 | 
						|
        childElements = data.proxyChildElements;
 | 
						|
      } else if (data.childElements.length) {
 | 
						|
        childElements = [data.childElements[0]];
 | 
						|
      }
 | 
						|
 | 
						|
      let count = this.#countVisibleAndHiddenAds(
 | 
						|
        element,
 | 
						|
        data.adsLoaded,
 | 
						|
        childElements,
 | 
						|
        innerWindowHeight,
 | 
						|
        scrollY
 | 
						|
      );
 | 
						|
      if (componentToVisibilityMap.has(data.type)) {
 | 
						|
        let componentInfo = componentToVisibilityMap.get(data.type);
 | 
						|
        componentInfo.adsLoaded += data.adsLoaded;
 | 
						|
        componentInfo.adsVisible += count.adsVisible;
 | 
						|
        componentInfo.adsHidden += count.adsHidden;
 | 
						|
      } else {
 | 
						|
        componentToVisibilityMap.set(data.type, {
 | 
						|
          adsLoaded: data.adsLoaded,
 | 
						|
          adsVisible: count.adsVisible,
 | 
						|
          adsHidden: count.adsHidden,
 | 
						|
        });
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    // Release the DOM elements from the Map.
 | 
						|
    this.#elementToAdDataMap.clear();
 | 
						|
 | 
						|
    return { componentToVisibilityMap, hrefToComponentMap };
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Given an element, find the href that is most likely to make the request if
 | 
						|
   * the element is clicked. If the element contains a specific data attribute
 | 
						|
   * known to contain the url used to make the initial request, use it,
 | 
						|
   * otherwise use its href. Specific character conversions are done to mimic
 | 
						|
   * conversions likely to take place when urls are observed in network
 | 
						|
   * activity.
 | 
						|
   *
 | 
						|
   * @param {Element} element
 | 
						|
   *  The element to inspect.
 | 
						|
   * @param {string} origin
 | 
						|
   *  The origin for relative urls.
 | 
						|
   * @returns {string}
 | 
						|
   *   The href of the element.
 | 
						|
   */
 | 
						|
  #extractHref(element, origin) {
 | 
						|
    let href;
 | 
						|
    // Prioritize the href from a known data attribute value instead of
 | 
						|
    // its href property, as the former is the initial url the page will
 | 
						|
    // navigate to before being re-directed to the href.
 | 
						|
    for (let name of this.#providerInfo.adServerAttributes) {
 | 
						|
      if (
 | 
						|
        element.dataset[name] &&
 | 
						|
        this.#providerInfo.extraAdServersRegexps.some(regexp =>
 | 
						|
          regexp.test(element.dataset[name])
 | 
						|
        )
 | 
						|
      ) {
 | 
						|
        href = element.dataset[name];
 | 
						|
        break;
 | 
						|
      }
 | 
						|
    }
 | 
						|
    // If a data attribute value was not found, fallback to the href.
 | 
						|
    href = href ?? element.getAttribute("href");
 | 
						|
    if (!href) {
 | 
						|
      return "";
 | 
						|
    }
 | 
						|
 | 
						|
    // Avoid extracting or fixing up Javascript URLs.
 | 
						|
    if (href.startsWith("javascript")) {
 | 
						|
      return "";
 | 
						|
    }
 | 
						|
 | 
						|
    // Hrefs can be relative.
 | 
						|
    if (!href.startsWith("https://") && !href.startsWith("http://")) {
 | 
						|
      href = origin + href;
 | 
						|
    }
 | 
						|
    // Per Bug 376844, apostrophes in query params are escaped, and thus, are
 | 
						|
    // percent-encoded by the time they are observed in the network. Even
 | 
						|
    // though it's more comprehensive, we avoid using newURI because its more
 | 
						|
    // expensive and conversions should be the exception.
 | 
						|
    // e.g. /path'?q=Mozilla's -> /path'?q=Mozilla%27s
 | 
						|
    let arr = href.split("?");
 | 
						|
    if (arr.length == 2 && arr[1].includes("'")) {
 | 
						|
      href = arr[0] + "?" + arr[1].replaceAll("'", "%27");
 | 
						|
    }
 | 
						|
    return href;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Given a list of anchor elements, group them into ad components.
 | 
						|
   *
 | 
						|
   * The first step in the process is to check if the anchor should be
 | 
						|
   * inspected. This is based on whether it contains an href or a
 | 
						|
   * data-attribute values that matches an ad link, or if it contains a
 | 
						|
   * pattern caught by a components included regular expression.
 | 
						|
   *
 | 
						|
   * Determine which component it belongs to and the number of matches for
 | 
						|
   * the component. The heuristic is described in findDataForAnchor.
 | 
						|
   * If there was a result and we haven't seen it before, save it in
 | 
						|
   * elementToAdDataMap.
 | 
						|
   *
 | 
						|
   * @param {HTMLCollectionOf<HTMLAnchorElement>} anchors
 | 
						|
   *  The list of anchors to inspect.
 | 
						|
   * @param {string} origin
 | 
						|
   *  The origin of the document the anchors belong to.
 | 
						|
   */
 | 
						|
  #categorizeAnchors(anchors, origin) {
 | 
						|
    for (let anchor of anchors) {
 | 
						|
      if (this.#shouldInspectAnchor(anchor, origin)) {
 | 
						|
        let result = this.#findDataForAnchor(anchor);
 | 
						|
        if (result) {
 | 
						|
          this.#recordElementData(result.element, {
 | 
						|
            type: result.type,
 | 
						|
            count: result.count,
 | 
						|
            proxyChildElements: result.proxyChildElements,
 | 
						|
            childElements: result.childElements,
 | 
						|
          });
 | 
						|
        }
 | 
						|
        if (result.relatedElements?.length) {
 | 
						|
          // Bug 1880413: Deprecate related elements.
 | 
						|
          // Bottom-up approach with related elements are only used for
 | 
						|
          // non-link elements related to ads, like carousel arrows.
 | 
						|
          ListenerHelper.addListeners(
 | 
						|
            result.relatedElements,
 | 
						|
            [
 | 
						|
              {
 | 
						|
                action: "expanded",
 | 
						|
                eventType: "click",
 | 
						|
              },
 | 
						|
            ],
 | 
						|
            result.type
 | 
						|
          );
 | 
						|
        }
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Find components from the document object. This is mostly relevant for
 | 
						|
   * components that are non-ads and don't have an obvious regular expression
 | 
						|
   * that could match the pattern of the href.
 | 
						|
   *
 | 
						|
   * @param {Document} document
 | 
						|
   */
 | 
						|
  #categorizeDocument(document) {
 | 
						|
    // using the subset of components that are top down,
 | 
						|
    // go through each one.
 | 
						|
    for (let component of this.#topDownComponents) {
 | 
						|
      // Top-down searches must have the topDown attribute.
 | 
						|
      if (!component.topDown) {
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
      // Top down searches must include a parent.
 | 
						|
      if (!component.included?.parent) {
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
      let parents = document.querySelectorAll(
 | 
						|
        component.included.parent.selector
 | 
						|
      );
 | 
						|
      if (parents.length) {
 | 
						|
        let eventListeners = component.included.parent.eventListeners;
 | 
						|
        if (eventListeners?.length) {
 | 
						|
          ListenerHelper.addListeners(parents, eventListeners, component.type);
 | 
						|
        }
 | 
						|
        for (let parent of parents) {
 | 
						|
          // Bug 1880413: Deprecate related elements.
 | 
						|
          // Top-down related elements are either used for auto-suggested
 | 
						|
          // elements of a searchbox, or elements on a page which we can't
 | 
						|
          // find through a bottom up approach but we want an add a listener,
 | 
						|
          // like carousels with arrows.
 | 
						|
          if (component.included.related?.selector) {
 | 
						|
            let relatedElements = parent.querySelectorAll(
 | 
						|
              component.included.related.selector
 | 
						|
            );
 | 
						|
            if (relatedElements.length) {
 | 
						|
              // For the search box, related elements with event listeners are
 | 
						|
              // auto-suggested terms. For everything else (e.g. carousels)
 | 
						|
              // they are expanded.
 | 
						|
              ListenerHelper.addListeners(
 | 
						|
                relatedElements,
 | 
						|
                [
 | 
						|
                  {
 | 
						|
                    action:
 | 
						|
                      component.type == "incontent_searchbox"
 | 
						|
                        ? "submitted"
 | 
						|
                        : "expanded",
 | 
						|
                    eventType: "click",
 | 
						|
                  },
 | 
						|
                ],
 | 
						|
                component.type
 | 
						|
              );
 | 
						|
            }
 | 
						|
          }
 | 
						|
          if (component.included.children) {
 | 
						|
            for (let child of component.included.children) {
 | 
						|
              let childElements = parent.querySelectorAll(child.selector);
 | 
						|
              if (childElements.length) {
 | 
						|
                if (child.eventListeners) {
 | 
						|
                  childElements = Array.from(childElements);
 | 
						|
                  ListenerHelper.addListeners(
 | 
						|
                    childElements,
 | 
						|
                    child.eventListeners,
 | 
						|
                    child.type ?? component.type
 | 
						|
                  );
 | 
						|
                }
 | 
						|
                if (!child.skipCount) {
 | 
						|
                  this.#recordElementData(parent, {
 | 
						|
                    type: component.type,
 | 
						|
                    childElements: Array.from(childElements),
 | 
						|
                  });
 | 
						|
                }
 | 
						|
              }
 | 
						|
            }
 | 
						|
          } else if (!component.included.parent.skipCount) {
 | 
						|
            this.#recordElementData(parent, {
 | 
						|
              type: component.type,
 | 
						|
            });
 | 
						|
          }
 | 
						|
        }
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Evaluates whether an anchor should be inspected based on matching
 | 
						|
   * regular expressions on either its href or specified data-attribute values.
 | 
						|
   *
 | 
						|
   * @param {HTMLAnchorElement} anchor
 | 
						|
   * @param {string} origin
 | 
						|
   * @returns {boolean}
 | 
						|
   */
 | 
						|
  #shouldInspectAnchor(anchor, origin) {
 | 
						|
    let href = anchor.getAttribute("href");
 | 
						|
    if (!href) {
 | 
						|
      return false;
 | 
						|
    }
 | 
						|
 | 
						|
    // Some hrefs might be relative.
 | 
						|
    if (!href.startsWith("https://") && !href.startsWith("http://")) {
 | 
						|
      href = origin + href;
 | 
						|
    }
 | 
						|
 | 
						|
    let regexps = this.#providerInfo.extraAdServersRegexps;
 | 
						|
    // Anchors can contain ad links in a data-attribute.
 | 
						|
    for (let name of this.#providerInfo.adServerAttributes) {
 | 
						|
      let attributeValue = anchor.dataset[name];
 | 
						|
      if (
 | 
						|
        attributeValue &&
 | 
						|
        regexps.some(regexp => regexp.test(attributeValue))
 | 
						|
      ) {
 | 
						|
        return true;
 | 
						|
      }
 | 
						|
    }
 | 
						|
    // Anchors can contain ad links in a specific href.
 | 
						|
    if (regexps.some(regexp => regexp.test(href))) {
 | 
						|
      return true;
 | 
						|
    }
 | 
						|
    return false;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Find the component data for an anchor.
 | 
						|
   *
 | 
						|
   * To categorize the anchor, we iterate over the list of possible components
 | 
						|
   * the anchor could be categorized. If the component is default, we skip
 | 
						|
   * checking because the fallback option for all anchor links is the default.
 | 
						|
   *
 | 
						|
   * First, get the "parent" of the anchor which best represents the DOM element
 | 
						|
   * that contains the anchor links for the component and no other component.
 | 
						|
   * This parent will be cached so that other anchors that share the same
 | 
						|
   * parent can be counted together.
 | 
						|
   *
 | 
						|
   * The check for a parent is a loop because we can define more than one best
 | 
						|
   * parent since on certain SERPs, it's possible for a "better" DOM element
 | 
						|
   * parent to appear occassionally.
 | 
						|
   *
 | 
						|
   * If no parent is found, skip this component.
 | 
						|
   *
 | 
						|
   * If a parent was found, check for specific child elements.
 | 
						|
   *
 | 
						|
   * Finding child DOM elements of a parent is optional. One reason to do so is
 | 
						|
   * to use child elements instead of anchor links to count the number of ads for
 | 
						|
   * a component via the `countChildren` property. This is provided because some ads
 | 
						|
   * (i.e. carousels) have multiple ad links in a single child element that go to the
 | 
						|
   * same location. In this scenario, all instances of the child are recorded as ads.
 | 
						|
   * Subsequent anchor elements that map to the same parent are ignored.
 | 
						|
   *
 | 
						|
   * Whether or not a child was found, return the information that was found,
 | 
						|
   * including whether or not all child elements were counted instead of anchors.
 | 
						|
   *
 | 
						|
   * If another anchor belonging to a parent that was previously recorded is the input
 | 
						|
   * for this function, we either increment the ad count by 1 or don't increment the ad
 | 
						|
   * count because the parent used `countChildren` completed the calculation in a
 | 
						|
   * previous step.
 | 
						|
   *
 | 
						|
   *
 | 
						|
   * @param {HTMLAnchorElement} anchor
 | 
						|
   *  The anchor to be inspected.
 | 
						|
   * @returns {object}
 | 
						|
   *  An object containing the element representing the root DOM element for
 | 
						|
   *  the component, the type of component, how many ads were counted,
 | 
						|
   *  and whether or not the count was of all the children.
 | 
						|
   */
 | 
						|
  #findDataForAnchor(anchor) {
 | 
						|
    for (let component of this.#providerInfo.components) {
 | 
						|
      // First, check various conditions for skipping a component.
 | 
						|
 | 
						|
      // A component should always have at least one included statement.
 | 
						|
      if (!component.included) {
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
 | 
						|
      // Top down searches are done after the bottom up search.
 | 
						|
      if (component.topDown) {
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
 | 
						|
      // The default component doesn't need to be checked,
 | 
						|
      // as it will be the fallback option.
 | 
						|
      if (component.default) {
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
 | 
						|
      // The anchor shouldn't belong to an excluded parent component if one
 | 
						|
      // is provided.
 | 
						|
      if (
 | 
						|
        component.excluded?.parent?.selector &&
 | 
						|
        anchor.closest(component.excluded.parent.selector)
 | 
						|
      ) {
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
 | 
						|
      // All components with included should have a parent entry.
 | 
						|
      if (!component.included.parent) {
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
 | 
						|
      // Find the parent of the anchor.
 | 
						|
      let parent = anchor.closest(component.included.parent.selector);
 | 
						|
 | 
						|
      if (!parent) {
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
 | 
						|
      // If we've already inspected the parent, add the child element to the
 | 
						|
      // list of anchors. Don't increment the ads loaded count, as we only care
 | 
						|
      // about grouping the anchor with the correct parent.
 | 
						|
      if (this.#elementToAdDataMap.has(parent)) {
 | 
						|
        return {
 | 
						|
          element: parent,
 | 
						|
          childElements: [anchor],
 | 
						|
        };
 | 
						|
      }
 | 
						|
 | 
						|
      let relatedElements = [];
 | 
						|
      if (component.included.related?.selector) {
 | 
						|
        relatedElements = parent.querySelectorAll(
 | 
						|
          component.included.related.selector
 | 
						|
        );
 | 
						|
      }
 | 
						|
 | 
						|
      // If the component has no defined children, return the parent element.
 | 
						|
      if (component.included.children) {
 | 
						|
        // Look for the first instance of a matching child selector.
 | 
						|
        for (let child of component.included.children) {
 | 
						|
          // If counting by child, get all of them at once.
 | 
						|
          if (child.countChildren) {
 | 
						|
            let proxyChildElements = parent.querySelectorAll(child.selector);
 | 
						|
            if (proxyChildElements.length) {
 | 
						|
              return {
 | 
						|
                element: parent,
 | 
						|
                type: child.type ?? component.type,
 | 
						|
                proxyChildElements: Array.from(proxyChildElements),
 | 
						|
                count: proxyChildElements.length,
 | 
						|
                childElements: [anchor],
 | 
						|
                relatedElements,
 | 
						|
              };
 | 
						|
            }
 | 
						|
          } else if (parent.querySelector(child.selector)) {
 | 
						|
            return {
 | 
						|
              element: parent,
 | 
						|
              type: child.type ?? component.type,
 | 
						|
              childElements: [anchor],
 | 
						|
              relatedElements,
 | 
						|
            };
 | 
						|
          }
 | 
						|
        }
 | 
						|
      }
 | 
						|
      // If no children were defined for this component, or none were found
 | 
						|
      // in the DOM, use the default definition.
 | 
						|
      return {
 | 
						|
        element: parent,
 | 
						|
        type: component.type,
 | 
						|
        childElements: [anchor],
 | 
						|
        relatedElements,
 | 
						|
      };
 | 
						|
    }
 | 
						|
    // If no component was found, use default values.
 | 
						|
    return {
 | 
						|
      element: anchor,
 | 
						|
      type: this.#defaultComponent.type,
 | 
						|
    };
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Determines whether or not an ad was visible or hidden.
 | 
						|
   *
 | 
						|
   * An ad is considered visible if the parent element containing the
 | 
						|
   * component has non-zero dimensions, and all child element in the
 | 
						|
   * component have non-zero dimensions and fits within the window
 | 
						|
   * at the time when the impression was takent.
 | 
						|
   *
 | 
						|
   * For some components, like text ads, we don't send every child
 | 
						|
   * element for visibility, just the first text ad. For other components
 | 
						|
   * like carousels, we send all child elements because we do care about
 | 
						|
   * counting how many elements of the carousel were visible.
 | 
						|
   *
 | 
						|
   * @param {Element} element
 | 
						|
   *  Element to be inspected
 | 
						|
   * @param {number} adsLoaded
 | 
						|
   *  Number of ads initially determined to be loaded for this element.
 | 
						|
   * @param {Array<Element>} childElements
 | 
						|
   *  List of children belonging to element.
 | 
						|
   * @param {number} innerWindowHeight
 | 
						|
   *  Current height of the window containing the elements.
 | 
						|
   * @param {number} scrollY
 | 
						|
   *  Current distance the window has been scrolled.
 | 
						|
   * @returns {object}
 | 
						|
   *  Contains adsVisible which is the number of ads shown for the element
 | 
						|
   *  and adsHidden, the number of ads not visible to the user.
 | 
						|
   */
 | 
						|
  #countVisibleAndHiddenAds(
 | 
						|
    element,
 | 
						|
    adsLoaded,
 | 
						|
    childElements,
 | 
						|
    innerWindowHeight,
 | 
						|
    scrollY
 | 
						|
  ) {
 | 
						|
    let elementRect =
 | 
						|
      element.ownerGlobal.windowUtils.getBoundsWithoutFlushing(element);
 | 
						|
 | 
						|
    // If the element lacks a dimension, assume all ads that
 | 
						|
    // were contained within it are hidden.
 | 
						|
    if (elementRect.width == 0 || elementRect.height == 0) {
 | 
						|
      return {
 | 
						|
        adsVisible: 0,
 | 
						|
        adsHidden: adsLoaded,
 | 
						|
      };
 | 
						|
    }
 | 
						|
 | 
						|
    // If an ad is far above the possible visible area of a window, an
 | 
						|
    // adblocker might be doing it as a workaround for blocking the ad.
 | 
						|
    if (
 | 
						|
      elementRect.bottom < 0 &&
 | 
						|
      innerWindowHeight + scrollY + elementRect.bottom < 0
 | 
						|
    ) {
 | 
						|
      return {
 | 
						|
        adsVisible: 0,
 | 
						|
        adsHidden: adsLoaded,
 | 
						|
      };
 | 
						|
    }
 | 
						|
 | 
						|
    // Since the parent element has dimensions but no child elements we want
 | 
						|
    // to inspect, check the parent itself is within the viewable area.
 | 
						|
    if (!childElements || !childElements.length) {
 | 
						|
      if (innerWindowHeight < elementRect.y + elementRect.height) {
 | 
						|
        return {
 | 
						|
          adsVisible: 0,
 | 
						|
          adsHidden: 0,
 | 
						|
        };
 | 
						|
      }
 | 
						|
      return {
 | 
						|
        adsVisible: 1,
 | 
						|
        adsHidden: 0,
 | 
						|
      };
 | 
						|
    }
 | 
						|
 | 
						|
    let adsVisible = 0;
 | 
						|
    let adsHidden = 0;
 | 
						|
    for (let child of childElements) {
 | 
						|
      let itemRect =
 | 
						|
        child.ownerGlobal.windowUtils.getBoundsWithoutFlushing(child);
 | 
						|
 | 
						|
      // If the child element we're inspecting has no dimension, it is hidden.
 | 
						|
      if (itemRect.height == 0 || itemRect.width == 0) {
 | 
						|
        adsHidden += 1;
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
 | 
						|
      // If the child element is to the left of the containing element, or to
 | 
						|
      // the right of the containing element, skip it.
 | 
						|
      if (
 | 
						|
        itemRect.x < elementRect.x ||
 | 
						|
        itemRect.x + itemRect.width > elementRect.x + elementRect.width
 | 
						|
      ) {
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
 | 
						|
      // If the child element is too far down, skip it.
 | 
						|
      if (innerWindowHeight < itemRect.y + itemRect.height) {
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
      ++adsVisible;
 | 
						|
    }
 | 
						|
 | 
						|
    return {
 | 
						|
      adsVisible,
 | 
						|
      adsHidden,
 | 
						|
    };
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Caches ad data for a DOM element. The key of the map is by Element rather
 | 
						|
   * than Component for fast lookup on whether an Element has been already been
 | 
						|
   * categorized as a component. Subsequent calls to this passing the same
 | 
						|
   * element will update the list of child elements.
 | 
						|
   *
 | 
						|
   * @param {Element} element
 | 
						|
   *  The element considered to be the root for the component.
 | 
						|
   * @param {object} params
 | 
						|
   *  Various parameters that can be recorded. Whether the input values exist
 | 
						|
   *  or not depends on which component was found, which heuristic should be used
 | 
						|
   *  to determine whether an ad was visible, and whether we've already seen this
 | 
						|
   *  element.
 | 
						|
   * @param {string | null} params.type
 | 
						|
   *  The type of component.
 | 
						|
   * @param {number} params.count
 | 
						|
   *  The number of ads found for a component. The number represents either
 | 
						|
   *  the number of elements that match an ad expression or the number of DOM
 | 
						|
   *  elements containing an ad link.
 | 
						|
   * @param {Array<Element>} params.proxyChildElements
 | 
						|
   *  An array of DOM elements that should be inspected for visibility instead
 | 
						|
   *  of the actual child elements, possibly because they are grouped.
 | 
						|
   * @param {Array<Element>} params.childElements
 | 
						|
   *  An array of DOM elements to inspect.
 | 
						|
   */
 | 
						|
  #recordElementData(
 | 
						|
    element,
 | 
						|
    { type, count = 1, proxyChildElements = [], childElements = [] } = {}
 | 
						|
  ) {
 | 
						|
    if (this.#elementToAdDataMap.has(element)) {
 | 
						|
      let recordedValues = this.#elementToAdDataMap.get(element);
 | 
						|
      if (childElements.length) {
 | 
						|
        recordedValues.childElements =
 | 
						|
          recordedValues.childElements.concat(childElements);
 | 
						|
      }
 | 
						|
    } else {
 | 
						|
      this.#elementToAdDataMap.set(element, {
 | 
						|
        type,
 | 
						|
        adsLoaded: count,
 | 
						|
        proxyChildElements,
 | 
						|
        childElements,
 | 
						|
      });
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * An object indicating which elements to examine for domains to extract and
 | 
						|
 * which heuristic technique to use to extract that element's domain.
 | 
						|
 *
 | 
						|
 * @typedef {object} ExtractorInfo
 | 
						|
 * @property {string} selectors
 | 
						|
 *  A string representing the CSS selector that targets the elements on the
 | 
						|
 *  page that contain domains we want to extract.
 | 
						|
 * @property {string} method
 | 
						|
 *  A string representing which domain extraction heuristic to use.
 | 
						|
 *  One of: "href", "dataAttribute" or "textContent".
 | 
						|
 * @property {object | null} options
 | 
						|
 *  Options related to the domain extraction heuristic used.
 | 
						|
 * @property {string | null} options.dataAttributeKey
 | 
						|
 *  The key name of the data attribute to lookup.
 | 
						|
 * @property {string | null} options.queryParamKey
 | 
						|
 *  The key name of the query param value to lookup.
 | 
						|
 * @property {boolean | null} options.queryParamValueIsHref
 | 
						|
 *  Whether the query param value is expected to contain an href.
 | 
						|
 */
 | 
						|
 | 
						|
/**
 | 
						|
 * DomainExtractor examines elements on a page to retrieve the domains.
 | 
						|
 */
 | 
						|
class DomainExtractor {
 | 
						|
  /**
 | 
						|
   * Extract domains from the page using an array of information pertaining to
 | 
						|
   * the SERP.
 | 
						|
   *
 | 
						|
   * @param {Document} document
 | 
						|
   *  The document for the SERP we are extracting domains from.
 | 
						|
   * @param {Array<ExtractorInfo>} extractorInfos
 | 
						|
   *  Information used to target the domains we need to extract.
 | 
						|
   * @param {string} providerName
 | 
						|
   *  Name of the search provider.
 | 
						|
   * @return {Set<string>}
 | 
						|
   *  A set of the domains extracted from the page.
 | 
						|
   */
 | 
						|
  extractDomainsFromDocument(document, extractorInfos, providerName) {
 | 
						|
    let extractedDomains = new Set();
 | 
						|
    if (!extractorInfos?.length) {
 | 
						|
      return extractedDomains;
 | 
						|
    }
 | 
						|
 | 
						|
    for (let extractorInfo of extractorInfos) {
 | 
						|
      if (!extractorInfo.selectors) {
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
 | 
						|
      let elements = document.querySelectorAll(extractorInfo.selectors);
 | 
						|
      if (!elements) {
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
 | 
						|
      switch (extractorInfo.method) {
 | 
						|
        case "href": {
 | 
						|
          // Origin is used in case a URL needs to be made absolute.
 | 
						|
          let origin = new URL(document.documentURI).origin;
 | 
						|
          this.#fromElementsConvertHrefsIntoDomains(
 | 
						|
            elements,
 | 
						|
            origin,
 | 
						|
            providerName,
 | 
						|
            extractedDomains,
 | 
						|
            extractorInfo.options?.queryParamKey,
 | 
						|
            extractorInfo.options?.queryParamValueIsHref
 | 
						|
          );
 | 
						|
          break;
 | 
						|
        }
 | 
						|
        case "dataAttribute": {
 | 
						|
          this.#fromElementsRetrieveDataAttributeValues(
 | 
						|
            elements,
 | 
						|
            providerName,
 | 
						|
            extractorInfo.options?.dataAttributeKey,
 | 
						|
            extractedDomains
 | 
						|
          );
 | 
						|
          break;
 | 
						|
        }
 | 
						|
        case "textContent": {
 | 
						|
          this.#fromElementsRetrieveTextContent(
 | 
						|
            elements,
 | 
						|
            extractedDomains,
 | 
						|
            providerName
 | 
						|
          );
 | 
						|
          break;
 | 
						|
        }
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    return extractedDomains;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Given a list of elements, extract domains using href attributes. If the
 | 
						|
   * URL in the href includes the specified query param, the domain will be
 | 
						|
   * that query param's value. Otherwise it will be the hostname of the href
 | 
						|
   * attribute's URL.
 | 
						|
   *
 | 
						|
   * @param {NodeList<Element>} elements
 | 
						|
   *  A list of elements from the page whose href attributes we want to
 | 
						|
   *  inspect.
 | 
						|
   * @param {string} origin
 | 
						|
   *  Origin of the current page.
 | 
						|
   * @param {string} providerName
 | 
						|
   *  The name of the search provider.
 | 
						|
   * @param {Set<string>} extractedDomains
 | 
						|
   *  The result set of domains extracted from the page.
 | 
						|
   * @param {string | null} queryParam
 | 
						|
   *  An optional query param to search for in an element's href attribute.
 | 
						|
   * @param {boolean | null} queryParamValueIsHref
 | 
						|
   *  Whether the query param value is expected to contain an href.
 | 
						|
   */
 | 
						|
  #fromElementsConvertHrefsIntoDomains(
 | 
						|
    elements,
 | 
						|
    origin,
 | 
						|
    providerName,
 | 
						|
    extractedDomains,
 | 
						|
    queryParam,
 | 
						|
    queryParamValueIsHref
 | 
						|
  ) {
 | 
						|
    for (let element of elements) {
 | 
						|
      if (this.#exceedsThreshold(extractedDomains.size)) {
 | 
						|
        return;
 | 
						|
      }
 | 
						|
 | 
						|
      let href = element.getAttribute("href");
 | 
						|
 | 
						|
      let url;
 | 
						|
      try {
 | 
						|
        url = new URL(href, origin);
 | 
						|
      } catch (ex) {
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
 | 
						|
      // Ignore non-standard protocols.
 | 
						|
      if (url.protocol != "https:" && url.protocol != "http:") {
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
 | 
						|
      if (queryParam) {
 | 
						|
        let paramValue = url.searchParams.get(queryParam);
 | 
						|
        if (queryParamValueIsHref) {
 | 
						|
          try {
 | 
						|
            paramValue = new URL(paramValue).hostname;
 | 
						|
          } catch (e) {
 | 
						|
            continue;
 | 
						|
          }
 | 
						|
          paramValue = this.#processDomain(paramValue, providerName);
 | 
						|
        }
 | 
						|
        if (paramValue && !extractedDomains.has(paramValue)) {
 | 
						|
          extractedDomains.add(paramValue);
 | 
						|
        }
 | 
						|
      } else if (url.hostname) {
 | 
						|
        let processedHostname = this.#processDomain(url.hostname, providerName);
 | 
						|
        if (processedHostname && !extractedDomains.has(processedHostname)) {
 | 
						|
          extractedDomains.add(processedHostname);
 | 
						|
        }
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Given a list of elements, examine each for the specified data attribute.
 | 
						|
   * If found, add that data attribute's value to the result set of extracted
 | 
						|
   * domains as is.
 | 
						|
   *
 | 
						|
   * @param {NodeList<Element>} elements
 | 
						|
   *  A list of elements from the page whose data attributes we want to
 | 
						|
   *  inspect.
 | 
						|
   * @param {string} providerName
 | 
						|
   *  The name of the search provider.
 | 
						|
   * @param {string} attribute
 | 
						|
   *  The name of a data attribute to search for within an element.
 | 
						|
   * @param {Set<string>} extractedDomains
 | 
						|
   *  The result set of domains extracted from the page.
 | 
						|
   */
 | 
						|
  #fromElementsRetrieveDataAttributeValues(
 | 
						|
    elements,
 | 
						|
    providerName,
 | 
						|
    attribute,
 | 
						|
    extractedDomains
 | 
						|
  ) {
 | 
						|
    for (let element of elements) {
 | 
						|
      if (this.#exceedsThreshold(extractedDomains.size)) {
 | 
						|
        return;
 | 
						|
      }
 | 
						|
      let value = element.dataset[attribute];
 | 
						|
      value = this.#processDomain(value, providerName);
 | 
						|
      if (value && !extractedDomains.has(value)) {
 | 
						|
        extractedDomains.add(value);
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  /* Given a list of elements, examine the text content for each element, which
 | 
						|
   * may be 1) a URL from which we can extract a domain or 2) text we can fix
 | 
						|
   * up to create a best guess as to a URL. If either condition is met, we add
 | 
						|
   * the domain to the result set.
 | 
						|
   *
 | 
						|
   * @param {NodeList<Element>} elements
 | 
						|
   *  A list of elements from the page whose text content we want to inspect.
 | 
						|
   * @param {Set<string>} extractedDomains
 | 
						|
   *  The result set of domains extracted from the page.
 | 
						|
   * @param {string} providerName
 | 
						|
   *  The name of the search provider.
 | 
						|
   */
 | 
						|
  #fromElementsRetrieveTextContent(elements, extractedDomains, providerName) {
 | 
						|
    // Not an exhaustive regex, but it fits our purpose for this method.
 | 
						|
    const LOOSE_URL_REGEX =
 | 
						|
      /^(?:https?:\/\/)?(?:www\.)?(?:[\w\-]+\.)+(?:[\w\-]{2,})/i;
 | 
						|
 | 
						|
    // Known but acceptable limitations to this function, where the return
 | 
						|
    // value won't be correctly fixed up:
 | 
						|
    //   1) A url is embedded within other text. Ex: "xkcd.com is cool."
 | 
						|
    //   2) The url contains legal but unusual characters. Ex: $ ! * '
 | 
						|
    function fixup(textContent) {
 | 
						|
      return textContent
 | 
						|
        .toLowerCase()
 | 
						|
        .replaceAll(" ", "")
 | 
						|
        .replace(/\.$/, "")
 | 
						|
        .concat(".com");
 | 
						|
    }
 | 
						|
 | 
						|
    for (let element of elements) {
 | 
						|
      if (this.#exceedsThreshold(extractedDomains.size)) {
 | 
						|
        return;
 | 
						|
      }
 | 
						|
      let textContent = element.textContent;
 | 
						|
      if (!textContent) {
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
 | 
						|
      let domain;
 | 
						|
      if (LOOSE_URL_REGEX.test(textContent)) {
 | 
						|
        // Creating a new URL object will throw if the protocol is missing.
 | 
						|
        if (!/^https?:\/\//.test(textContent)) {
 | 
						|
          textContent = "https://" + textContent;
 | 
						|
        }
 | 
						|
 | 
						|
        try {
 | 
						|
          domain = new URL(textContent).hostname;
 | 
						|
        } catch (e) {
 | 
						|
          domain = fixup(textContent);
 | 
						|
        }
 | 
						|
      } else {
 | 
						|
        domain = fixup(textContent);
 | 
						|
      }
 | 
						|
 | 
						|
      let processedDomain = this.#processDomain(domain, providerName);
 | 
						|
      if (processedDomain && !extractedDomains.has(processedDomain)) {
 | 
						|
        extractedDomains.add(processedDomain);
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Processes a raw domain extracted from the SERP into its final form before
 | 
						|
   * categorization.
 | 
						|
   *
 | 
						|
   * @param {string} domain
 | 
						|
   *   The domain extracted from the page.
 | 
						|
   * @param {string} providerName
 | 
						|
   *   The provider associated with the page.
 | 
						|
   * @returns {string}
 | 
						|
   *   The domain without any subdomains.
 | 
						|
   */
 | 
						|
  #processDomain(domain, providerName) {
 | 
						|
    if (
 | 
						|
      domain.startsWith(`${providerName}.`) ||
 | 
						|
      domain.includes(`.${providerName}.`)
 | 
						|
    ) {
 | 
						|
      return "";
 | 
						|
    }
 | 
						|
    return this.#stripDomainOfSubdomains(domain);
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Helper to strip domains of any subdomains.
 | 
						|
   *
 | 
						|
   * @param {string} domain
 | 
						|
   *   The domain to strip of any subdomains.
 | 
						|
   * @returns {object} browser
 | 
						|
   *   The given domain with any subdomains removed.
 | 
						|
   */
 | 
						|
  #stripDomainOfSubdomains(domain) {
 | 
						|
    let tld;
 | 
						|
    // Can throw an exception if the input has too few domain levels.
 | 
						|
    try {
 | 
						|
      tld = Services.eTLD.getKnownPublicSuffixFromHost(domain);
 | 
						|
    } catch (ex) {
 | 
						|
      return "";
 | 
						|
    }
 | 
						|
 | 
						|
    let domainWithoutTLD = domain.substring(0, domain.length - tld.length);
 | 
						|
    let secondLevelDomain = domainWithoutTLD.split(".").at(-2);
 | 
						|
 | 
						|
    return secondLevelDomain ? `${secondLevelDomain}.${tld}` : "";
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Per a request from Data Science, we need to limit the number of domains
 | 
						|
   * categorized to 10 non-ad domains and 10 ad domains.
 | 
						|
   *
 | 
						|
   * @param {number} nDomains The number of domains processed.
 | 
						|
   * @returns {boolean} Whether or not the threshold was exceeded.
 | 
						|
   */
 | 
						|
  #exceedsThreshold(nDomains) {
 | 
						|
    return nDomains >= CATEGORIZATION_SETTINGS.MAX_DOMAINS_TO_CATEGORIZE;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
export const domainExtractor = new DomainExtractor();
 | 
						|
const searchProviders = new SearchProviders();
 | 
						|
const searchAdImpression = new SearchAdImpression();
 | 
						|
 | 
						|
const documentToEventCallbackMap = new WeakMap();
 | 
						|
const documentToRemoveEventListenersMap = new WeakMap();
 | 
						|
const documentToSubmitMap = new WeakMap();
 | 
						|
 | 
						|
/**
 | 
						|
 * SearchTelemetryChild monitors for pages that are partner searches, and
 | 
						|
 * looks through them to find links which looks like adverts and sends back
 | 
						|
 * a notification to SearchTelemetry for possible telemetry reporting.
 | 
						|
 *
 | 
						|
 * Only the partner details and the fact that at least one ad was found on the
 | 
						|
 * page are returned to SearchTelemetry. If no ads are found, no notification is
 | 
						|
 * given.
 | 
						|
 */
 | 
						|
export class SearchSERPTelemetryChild extends JSWindowActorChild {
 | 
						|
  /**
 | 
						|
   * Amount of time to wait after a page event before examining the page
 | 
						|
   * for ads.
 | 
						|
   *
 | 
						|
   * @type {number | null}
 | 
						|
   */
 | 
						|
  #adTimeout;
 | 
						|
  /**
 | 
						|
   * Determines if there is a provider that matches the supplied URL and returns
 | 
						|
   * the information associated with that provider.
 | 
						|
   *
 | 
						|
   * @param {string} url The url to check
 | 
						|
   * @returns {array|null} Returns null if there's no match, otherwise an array
 | 
						|
   *   of provider name and the provider information.
 | 
						|
   */
 | 
						|
  _getProviderInfoForUrl(url) {
 | 
						|
    return searchProviders.info?.find(info => info.searchPageRegexp.test(url));
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Checks to see if the page is a partner and has an ad link within it. If so,
 | 
						|
   * it will notify SearchTelemetry.
 | 
						|
   */
 | 
						|
  _checkForAdLink(eventType) {
 | 
						|
    try {
 | 
						|
      if (!this.contentWindow) {
 | 
						|
        return;
 | 
						|
      }
 | 
						|
    } catch (ex) {
 | 
						|
      // unload occurred before the timer expired
 | 
						|
      return;
 | 
						|
    }
 | 
						|
 | 
						|
    let doc = this.document;
 | 
						|
    let url = doc.documentURI;
 | 
						|
    let providerInfo = this._getProviderInfoForUrl(url);
 | 
						|
    if (!providerInfo) {
 | 
						|
      return;
 | 
						|
    }
 | 
						|
 | 
						|
    let regexps = providerInfo.extraAdServersRegexps;
 | 
						|
    let anchors = doc.getElementsByTagName("a");
 | 
						|
    let hasAds = false;
 | 
						|
    for (let anchor of anchors) {
 | 
						|
      if (!anchor.href) {
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
      for (let name of providerInfo.adServerAttributes) {
 | 
						|
        hasAds = regexps.some(regexp => regexp.test(anchor.dataset[name]));
 | 
						|
        if (hasAds) {
 | 
						|
          break;
 | 
						|
        }
 | 
						|
      }
 | 
						|
      if (!hasAds) {
 | 
						|
        hasAds = regexps.some(regexp => regexp.test(anchor.href));
 | 
						|
      }
 | 
						|
      if (hasAds) {
 | 
						|
        break;
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    if (hasAds) {
 | 
						|
      this.sendAsyncMessage("SearchTelemetry:PageInfo", {
 | 
						|
        hasAds,
 | 
						|
        url,
 | 
						|
      });
 | 
						|
    }
 | 
						|
 | 
						|
    if (
 | 
						|
      providerInfo.components?.length &&
 | 
						|
      (eventType == "load" || eventType == "pageshow")
 | 
						|
    ) {
 | 
						|
      // Start performance measurements.
 | 
						|
      let start = Cu.now();
 | 
						|
      let timerId = Glean.serp.categorizationDuration.start();
 | 
						|
 | 
						|
      let pageActionCallback = info => {
 | 
						|
        if (info.action == "submitted") {
 | 
						|
          documentToSubmitMap.set(doc, true);
 | 
						|
        }
 | 
						|
        this.sendAsyncMessage("SearchTelemetry:Action", {
 | 
						|
          target: info.target,
 | 
						|
          url: info.url,
 | 
						|
          action: info.action,
 | 
						|
        });
 | 
						|
      };
 | 
						|
      documentToEventCallbackMap.set(this.document, pageActionCallback);
 | 
						|
 | 
						|
      let componentToVisibilityMap, hrefToComponentMap;
 | 
						|
      try {
 | 
						|
        let result = searchAdImpression.categorize(anchors, doc);
 | 
						|
        componentToVisibilityMap = result.componentToVisibilityMap;
 | 
						|
        hrefToComponentMap = result.hrefToComponentMap;
 | 
						|
      } catch (e) {
 | 
						|
        // Cancel the timer if an error encountered.
 | 
						|
        Glean.serp.categorizationDuration.cancel(timerId);
 | 
						|
      }
 | 
						|
 | 
						|
      if (componentToVisibilityMap && hrefToComponentMap) {
 | 
						|
        // End measurements.
 | 
						|
        ChromeUtils.addProfilerMarker(
 | 
						|
          "SearchSERPTelemetryChild._checkForAdLink",
 | 
						|
          start,
 | 
						|
          "Checked anchors for visibility"
 | 
						|
        );
 | 
						|
        Glean.serp.categorizationDuration.stopAndAccumulate(timerId);
 | 
						|
        this.sendAsyncMessage("SearchTelemetry:AdImpressions", {
 | 
						|
          adImpressions: componentToVisibilityMap,
 | 
						|
          hrefToComponentMap,
 | 
						|
          url,
 | 
						|
        });
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    if (
 | 
						|
      lazy.serpEventTelemetryCategorization &&
 | 
						|
      providerInfo.domainExtraction &&
 | 
						|
      (eventType == "load" || eventType == "pageshow")
 | 
						|
    ) {
 | 
						|
      let start = Cu.now();
 | 
						|
      let nonAdDomains = domainExtractor.extractDomainsFromDocument(
 | 
						|
        doc,
 | 
						|
        providerInfo.domainExtraction.nonAds,
 | 
						|
        providerInfo.telemetryId
 | 
						|
      );
 | 
						|
      let adDomains = domainExtractor.extractDomainsFromDocument(
 | 
						|
        doc,
 | 
						|
        providerInfo.domainExtraction.ads,
 | 
						|
        providerInfo.telemetryId
 | 
						|
      );
 | 
						|
 | 
						|
      this.sendAsyncMessage("SearchTelemetry:Domains", {
 | 
						|
        url,
 | 
						|
        nonAdDomains,
 | 
						|
        adDomains,
 | 
						|
      });
 | 
						|
 | 
						|
      ChromeUtils.addProfilerMarker(
 | 
						|
        "SearchSERPTelemetryChild._checkForAdLink",
 | 
						|
        start,
 | 
						|
        "Extract domains from elements"
 | 
						|
      );
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Checks for the presence of certain components on the page that are
 | 
						|
   * required for recording the page impression.
 | 
						|
   */
 | 
						|
  #checkForPageImpressionComponents() {
 | 
						|
    let url = this.document.documentURI;
 | 
						|
    let providerInfo = this._getProviderInfoForUrl(url);
 | 
						|
    if (providerInfo.components?.length) {
 | 
						|
      searchAdImpression.providerInfo = providerInfo;
 | 
						|
      let start = Cu.now();
 | 
						|
      let shoppingTabDisplayed = searchAdImpression.hasShoppingTab(
 | 
						|
        this.document
 | 
						|
      );
 | 
						|
      ChromeUtils.addProfilerMarker(
 | 
						|
        "SearchSERPTelemetryChild.#recordImpression",
 | 
						|
        start,
 | 
						|
        "Checked for shopping tab"
 | 
						|
      );
 | 
						|
      this.sendAsyncMessage("SearchTelemetry:PageImpression", {
 | 
						|
        url,
 | 
						|
        shoppingTabDisplayed,
 | 
						|
      });
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  #removeEventListeners() {
 | 
						|
    let callbacks = documentToRemoveEventListenersMap.get(this.document);
 | 
						|
    if (callbacks) {
 | 
						|
      for (let callback of callbacks) {
 | 
						|
        callback();
 | 
						|
      }
 | 
						|
      documentToRemoveEventListenersMap.delete(this.document);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Handles events received from the actor child notifications.
 | 
						|
   *
 | 
						|
   * @param {object} event The event details.
 | 
						|
   */
 | 
						|
  handleEvent(event) {
 | 
						|
    if (!this.#urlIsSERP(this.document.documentURI)) {
 | 
						|
      return;
 | 
						|
    }
 | 
						|
    switch (event.type) {
 | 
						|
      case "pageshow": {
 | 
						|
        // If a page is loaded from the bfcache, we won't get a "DOMContentLoaded"
 | 
						|
        // event, so we need to rely on "pageshow" in this case. Note: we do this
 | 
						|
        // so that we remain consistent with the *.in-content:sap* count for the
 | 
						|
        // SEARCH_COUNTS histogram.
 | 
						|
        if (event.persisted) {
 | 
						|
          this.#checkForPageImpressionComponents();
 | 
						|
          this.#check(event.type);
 | 
						|
        }
 | 
						|
        break;
 | 
						|
      }
 | 
						|
      case "DOMContentLoaded": {
 | 
						|
        this.#checkForPageImpressionComponents();
 | 
						|
        this.#check(event.type);
 | 
						|
        break;
 | 
						|
      }
 | 
						|
      case "load": {
 | 
						|
        // We check both DOMContentLoaded and load in case the page has
 | 
						|
        // taken a long time to load and the ad is only detected on load.
 | 
						|
        // We still check at DOMContentLoaded because if the page hasn't
 | 
						|
        // finished loading and the user navigates away, we still want to know
 | 
						|
        // if there were ads on the page or not at that time.
 | 
						|
        this.#check(event.type);
 | 
						|
        break;
 | 
						|
      }
 | 
						|
      case "pagehide": {
 | 
						|
        let callbacks = documentToRemoveEventListenersMap.get(this.document);
 | 
						|
        if (callbacks) {
 | 
						|
          for (let removeEventListenerCallback of callbacks) {
 | 
						|
            removeEventListenerCallback();
 | 
						|
          }
 | 
						|
          documentToRemoveEventListenersMap.delete(this.document);
 | 
						|
        }
 | 
						|
        this.#cancelCheck();
 | 
						|
        break;
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  async receiveMessage(message) {
 | 
						|
    switch (message.name) {
 | 
						|
      case "SearchSERPTelemetry:WaitForSPAPageLoad":
 | 
						|
        lazy.setTimeout(() => {
 | 
						|
          this.#checkForPageImpressionComponents();
 | 
						|
          this._checkForAdLink("load");
 | 
						|
        }, Services.cpmm.sharedData.get(SEARCH_TELEMETRY_SHARED.SPA_LOAD_TIMEOUT));
 | 
						|
        break;
 | 
						|
      case "SearchSERPTelemetry:StopTrackingDocument":
 | 
						|
        this.#removeDocumentFromSubmitMap();
 | 
						|
        this.#removeEventListeners();
 | 
						|
        break;
 | 
						|
      case "SearchSERPTelemetry:DidSubmit":
 | 
						|
        return this.#didSubmit();
 | 
						|
    }
 | 
						|
    return null;
 | 
						|
  }
 | 
						|
 | 
						|
  #didSubmit() {
 | 
						|
    return documentToSubmitMap.get(this.document);
 | 
						|
  }
 | 
						|
 | 
						|
  #removeDocumentFromSubmitMap() {
 | 
						|
    documentToSubmitMap.delete(this.document);
 | 
						|
  }
 | 
						|
 | 
						|
  #urlIsSERP(url) {
 | 
						|
    let provider = this._getProviderInfoForUrl(this.document.documentURI);
 | 
						|
    if (provider) {
 | 
						|
      // Some URLs can match provider info but also be the provider's homepage
 | 
						|
      // instead of a SERP.
 | 
						|
      // e.g. https://example.com/ vs. https://example.com/?foo=bar
 | 
						|
      // To check this, we look for the presence of the query parameter
 | 
						|
      // that contains a search term.
 | 
						|
      let queries = new URLSearchParams(url.split("#")[0].split("?")[1]);
 | 
						|
      for (let queryParamName of provider.queryParamNames) {
 | 
						|
        if (queries.get(queryParamName)) {
 | 
						|
          return true;
 | 
						|
        }
 | 
						|
      }
 | 
						|
    }
 | 
						|
    return false;
 | 
						|
  }
 | 
						|
 | 
						|
  #cancelCheck() {
 | 
						|
    if (this._waitForContentTimeout) {
 | 
						|
      lazy.clearTimeout(this._waitForContentTimeout);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  #check(eventType) {
 | 
						|
    if (!this.#adTimeout) {
 | 
						|
      this.#adTimeout = Services.cpmm.sharedData.get(
 | 
						|
        SEARCH_TELEMETRY_SHARED.LOAD_TIMEOUT
 | 
						|
      );
 | 
						|
    }
 | 
						|
    this.#cancelCheck();
 | 
						|
    this._waitForContentTimeout = lazy.setTimeout(() => {
 | 
						|
      this._checkForAdLink(eventType);
 | 
						|
    }, this.#adTimeout);
 | 
						|
  }
 | 
						|
}
 |