From 0d0dbf9c30b81dffb8d59273fddffae8a4fdc985 Mon Sep 17 00:00:00 2001 From: James Teow Date: Tue, 27 Feb 2024 14:05:22 +0000 Subject: [PATCH] Bug 1881675 - Allow the ability to ignore specific urls in the network observer - r=scunnane In rare cases, we may need to add event listeners in the content process which also trigger a network event that would be parsed by the network observer. To avoid counting double engagements, I propose we add a new array of regular expressions that when a URL matches, will never be parsed by the SERP telemetry logic in the network observer. This would rely on the content process events listeners to register the engagement event. I think the regular expression should be specific enough that it shouldn't match something in a search result lest we ignore a click on it. Differential Revision: https://phabricator.services.mozilla.com/D202583 --- .../search/SearchSERPTelemetry.sys.mjs | 9 + .../test/browser/telemetry/browser.toml | 3 + ...ser_search_telemetry_engagement_content.js | 8 +- ..._telemetry_engagement_ignoreLinkRegexps.js | 223 ++++++++++++++++++ .../search/test/browser/telemetry/head.js | 5 + ...rchTelemetryAd_searchbox_with_content.html | 6 +- 6 files changed, 247 insertions(+), 7 deletions(-) create mode 100644 browser/components/search/test/browser/telemetry/browser_search_telemetry_engagement_ignoreLinkRegexps.js diff --git a/browser/components/search/SearchSERPTelemetry.sys.mjs b/browser/components/search/SearchSERPTelemetry.sys.mjs index 62f5b65a626b..877153756471 100644 --- a/browser/components/search/SearchSERPTelemetry.sys.mjs +++ b/browser/components/search/SearchSERPTelemetry.sys.mjs @@ -404,6 +404,10 @@ class TelemetryHandler { ); } + newProvider.ignoreLinkRegexps = provider.ignoreLinkRegexps?.length + ? provider.ignoreLinkRegexps.map(r => new RegExp(r)) + : []; + newProvider.nonAdsLinkRegexps = provider.nonAdsLinkRegexps?.length ? provider.nonAdsLinkRegexps.map(r => new RegExp(r)) : []; @@ -1331,6 +1335,11 @@ class ContentHandler { let originURL = wrappedChannel.originURI?.spec; let url = wrappedChannel.finalURL; + + if (info.ignoreLinkRegexps.some(r => r.test(url))) { + return; + } + // Some channels re-direct by loading pages that return 200. The result // is the channel will have an originURL that changes from the SERP to // either a nonAdsRegexp or an extraAdServersRegexps. This is typical diff --git a/browser/components/search/test/browser/telemetry/browser.toml b/browser/components/search/test/browser/telemetry/browser.toml index 9e056d7c467d..624871cccc49 100644 --- a/browser/components/search/test/browser/telemetry/browser.toml +++ b/browser/components/search/test/browser/telemetry/browser.toml @@ -97,6 +97,9 @@ support-files = ["searchTelemetryAd_searchbox_with_content.html", "serp.css"] ["browser_search_telemetry_engagement_eventListeners_parent.js"] support-files = ["searchTelemetryAd_searchbox_with_content.html", "serp.css"] +["browser_search_telemetry_engagement_ignoreLinkRegexps.js"] +support-files = ["searchTelemetryAd_searchbox_with_content.html", "serp.css"] + ["browser_search_telemetry_engagement_multiple_tabs.js"] support-files = [ "searchTelemetryAd_searchbox_with_content.html", diff --git a/browser/components/search/test/browser/telemetry/browser_search_telemetry_engagement_content.js b/browser/components/search/test/browser/telemetry/browser_search_telemetry_engagement_content.js index a7ea62ebd5cc..f94e6b0bd8be 100644 --- a/browser/components/search/test/browser/telemetry/browser_search_telemetry_engagement_content.js +++ b/browser/components/search/test/browser/telemetry/browser_search_telemetry_engagement_content.js @@ -138,8 +138,8 @@ add_task(async function test_click_tab() { { impression: { provider: "example", - tagged: "false", - partner_code: "", + tagged: "true", + partner_code: "ff", source: "unknown", is_shopping_page: "false", is_private: "false", @@ -217,8 +217,8 @@ add_task(async function test_click_shopping() { { impression: { provider: "example", - tagged: "false", - partner_code: "", + tagged: "true", + partner_code: "ff", source: "unknown", is_shopping_page: "true", is_private: "false", diff --git a/browser/components/search/test/browser/telemetry/browser_search_telemetry_engagement_ignoreLinkRegexps.js b/browser/components/search/test/browser/telemetry/browser_search_telemetry_engagement_ignoreLinkRegexps.js new file mode 100644 index 000000000000..10f2a2d8366e --- /dev/null +++ b/browser/components/search/test/browser/telemetry/browser_search_telemetry_engagement_ignoreLinkRegexps.js @@ -0,0 +1,223 @@ +/* Any copyright is dedicated to the Public Domain. + * http://creativecommons.org/publicdomain/zero/1.0/ */ + +"use strict"; + +/** + * Tests ignoreLinkRegexps property in search telemetry that explicitly results + * in our network code ignoring the link. The main reason for doing so is for + * rare situations where we need to find a components from a topDown approach + * but it loads a page in the network process. + */ + +const TEST_PROVIDER_INFO = [ + { + telemetryId: "example", + searchPageRegexp: + /^https:\/\/example.org\/browser\/browser\/components\/search\/test\/browser\/telemetry\/searchTelemetryAd/, + queryParamNames: ["s"], + codeParamName: "abc", + taggedCodes: ["ff"], + adServerAttributes: ["mozAttr"], + extraAdServersRegexps: [/^https:\/\/example\.com\/ad/], + ignoreLinkRegexps: [ + /^https:\/\/example.org\/browser\/browser\/components\/search\/test\/browser\/telemetry\/searchTelemetryAd_searchbox_with_content.html\?s=test&page=images/, + /^https:\/\/example.org\/browser\/browser\/components\/search\/test\/browser\/telemetry\/searchTelemetryAd_searchbox_with_content.html\?s=test&page=shopping/, + ], + components: [ + { + type: SearchSERPTelemetryUtils.COMPONENTS.AD_LINK, + default: true, + }, + ], + hrefToComponentMapAugmentation: [ + { + action: "clicked_something", + target: SearchSERPTelemetryUtils.COMPONENTS.REFINED_SEARCH_BUTTONS, + url: "https://example.org/browser/browser/components/search/test/browser/telemetry/searchTelemetryAd_searchbox.html", + }, + ], + }, +]; + +// The impression doesn't change in these tests. +const IMPRESSION = { + provider: "example", + tagged: "true", + partner_code: "ff", + source: "unknown", + is_shopping_page: "false", + is_private: "false", + shopping_tab_displayed: "false", +}; + +const SERP_URL = getSERPUrl("searchTelemetryAd_searchbox_with_content.html"); + +async function replaceIncludedProperty(included) { + TEST_PROVIDER_INFO[0].components = [ + { + type: SearchSERPTelemetryUtils.COMPONENTS.REFINED_SEARCH_BUTTONS, + included, + topDown: true, + }, + ]; + SearchSERPTelemetry.overrideSearchTelemetryForTests(TEST_PROVIDER_INFO); + await waitForIdle(); +} + +add_setup(async function () { + SearchSERPTelemetry.overrideSearchTelemetryForTests(TEST_PROVIDER_INFO); + await waitForIdle(); + // Enable local telemetry recording for the duration of the tests. + let oldCanRecord = Services.telemetry.canRecordExtended; + Services.telemetry.canRecordExtended = true; + + registerCleanupFunction(async () => { + SearchSERPTelemetry.overrideSearchTelemetryForTests(); + Services.telemetry.canRecordExtended = oldCanRecord; + resetTelemetry(); + }); +}); + +add_task(async function test_click_link_1_matching_ignore_link_regexps() { + resetTelemetry(); + + let { tab, cleanup } = await openSerpInNewTab(SERP_URL); + + let promise = BrowserTestUtils.browserLoaded(tab.linkedBrowser); + await BrowserTestUtils.synthesizeMouseAtCenter( + "#images", + {}, + tab.linkedBrowser + ); + await promise; + + assertSERPTelemetry([ + { + impression: IMPRESSION, + abandonment: { + reason: SearchSERPTelemetryUtils.ABANDONMENTS.NAVIGATION, + }, + }, + { + impression: IMPRESSION, + }, + ]); + + await cleanup(); +}); + +add_task(async function test_click_link_2_matching_ignore_link_regexps() { + resetTelemetry(); + + let { tab, cleanup } = await openSerpInNewTab(SERP_URL); + + let promise = BrowserTestUtils.browserLoaded(tab.linkedBrowser); + await BrowserTestUtils.synthesizeMouseAtCenter( + "#shopping", + {}, + tab.linkedBrowser + ); + await promise; + + assertSERPTelemetry([ + { + impression: IMPRESSION, + abandonment: { + reason: SearchSERPTelemetryUtils.ABANDONMENTS.NAVIGATION, + }, + }, + { + impression: IMPRESSION, + }, + ]); + + await cleanup(); +}); + +add_task(async function test_click_link_3_not_matching_ignore_link_regexps() { + resetTelemetry(); + + let { tab, cleanup } = await openSerpInNewTab(SERP_URL); + + let promise = BrowserTestUtils.browserLoaded(tab.linkedBrowser); + await BrowserTestUtils.synthesizeMouseAtCenter( + "#extra", + {}, + tab.linkedBrowser + ); + await promise; + + assertSERPTelemetry([ + { + impression: IMPRESSION, + engagements: [ + { + action: "clicked", + target: SearchSERPTelemetryUtils.COMPONENTS.NON_ADS_LINK, + }, + ], + }, + { + impression: IMPRESSION, + }, + ]); + + await cleanup(); +}); + +add_task(async function test_click_listener_with_ignore_link_regexps() { + resetTelemetry(); + + TEST_PROVIDER_INFO[0].components = [ + { + type: SearchSERPTelemetryUtils.COMPONENTS.REFINED_SEARCH_BUTTONS, + topDown: true, + included: { + parent: { + selector: "nav a", + skipCount: true, + eventListeners: [ + { + eventType: "click", + action: "clicked", + }, + ], + }, + }, + }, + { + type: SearchSERPTelemetryUtils.COMPONENTS.AD_LINK, + default: true, + }, + ]; + SearchSERPTelemetry.overrideSearchTelemetryForTests(TEST_PROVIDER_INFO); + await waitForIdle(); + + let { tab, cleanup } = await openSerpInNewTab(SERP_URL); + + let promise = BrowserTestUtils.browserLoaded(tab.linkedBrowser); + await BrowserTestUtils.synthesizeMouseAtCenter( + "#images", + {}, + tab.linkedBrowser + ); + await promise; + + assertSERPTelemetry([ + { + impression: IMPRESSION, + engagements: [ + { + action: "clicked", + target: SearchSERPTelemetryUtils.COMPONENTS.REFINED_SEARCH_BUTTONS, + }, + ], + }, + { + impression: IMPRESSION, + }, + ]); + + await cleanup(); +}); diff --git a/browser/components/search/test/browser/telemetry/head.js b/browser/components/search/test/browser/telemetry/head.js index 07f323f515e7..760489f585dc 100644 --- a/browser/components/search/test/browser/telemetry/head.js +++ b/browser/components/search/test/browser/telemetry/head.js @@ -207,6 +207,11 @@ function resetTelemetry() { * values we use to validate the recorded Glean impression events. */ function assertSERPTelemetry(expectedEvents) { + // Do a deep copy of impressions in case the input is using constants, as + // we insert impression id into the expected events to make it easier to + // run Assert.deepEqual() on the expected and actual result. + expectedEvents = JSON.parse(JSON.stringify(expectedEvents)); + // A single test might run assertImpressionEvents more than once // so the Set needs to be cleared or else the impression event // check will throw. diff --git a/browser/components/search/test/browser/telemetry/searchTelemetryAd_searchbox_with_content.html b/browser/components/search/test/browser/telemetry/searchTelemetryAd_searchbox_with_content.html index 672fe4943515..061300155109 100644 --- a/browser/components/search/test/browser/telemetry/searchTelemetryAd_searchbox_with_content.html +++ b/browser/components/search/test/browser/telemetry/searchTelemetryAd_searchbox_with_content.html @@ -11,9 +11,9 @@