forked from mirrors/gecko-dev
		
	
		
			
				
	
	
		
			286 lines
		
	
	
	
		
			8.4 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			286 lines
		
	
	
	
		
			8.4 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| /* This Source Code Form is subject to the terms of the Mozilla Public
 | |
|  * License, v. 2.0. If a copy of the MPL was not distributed with this
 | |
|  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 | |
| 
 | |
| import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs";
 | |
| 
 | |
| const lazy = {};
 | |
| 
 | |
| ChromeUtils.defineESModuleGetters(lazy, {
 | |
|   UrlbarUtils: "resource:///modules/UrlbarUtils.sys.mjs",
 | |
| });
 | |
| 
 | |
| XPCOMUtils.defineLazyModuleGetters(lazy, {
 | |
|   FilterAdult: "resource://activity-stream/lib/FilterAdult.jsm",
 | |
| });
 | |
| 
 | |
| ChromeUtils.defineLazyGetter(lazy, "logConsole", function () {
 | |
|   return console.createInstance({
 | |
|     prefix: "InteractionsBlocklist",
 | |
|     maxLogLevel: Services.prefs.getBoolPref(
 | |
|       "browser.places.interactions.log",
 | |
|       false
 | |
|     )
 | |
|       ? "Debug"
 | |
|       : "Warn",
 | |
|   });
 | |
| });
 | |
| 
 | |
| // A blocklist of regular expressions. Maps base hostnames to a list regular
 | |
| // expressions for URLs with that base hostname. In this context, "base
 | |
| // hostname" means the hostname without any subdomains or a public suffix. For
 | |
| // example, the base hostname for "https://www.maps.google.com/a/place" is
 | |
| // "google". We do this mapping to improve performance; otherwise we'd have to
 | |
| // check all URLs against a long list of regular expressions. The regexes are
 | |
| // defined as escaped strings so that we build them lazily.
 | |
| // We may want to migrate this list to Remote Settings in the future.
 | |
| let HOST_BLOCKLIST = {
 | |
|   auth0: [
 | |
|     // Auth0 OAuth.
 | |
|     // XXX: Used alone this could produce false positives where an auth0 URL
 | |
|     // appears after another valid domain and TLD, but since we limit this to
 | |
|     // the auth0 hostname those occurrences will be filtered out.
 | |
|     "^https:\\/\\/.*\\.auth0\\.com\\/login",
 | |
|   ],
 | |
|   baidu: [
 | |
|     // Baidu SERP
 | |
|     "^(https?:\\/\\/)?(www\\.)?baidu\\.com\\/s.*(\\?|&)wd=.*",
 | |
|   ],
 | |
|   bing: [
 | |
|     // Bing SERP
 | |
|     "^(https?:\\/\\/)?(www\\.)?bing\\.com\\/search.*(\\?|&)q=.*",
 | |
|   ],
 | |
|   duckduckgo: [
 | |
|     // DuckDuckGo SERP
 | |
|     "^(https?:\\/\\/)?(www\\.)?duckduckgo\\.com\\/.*(\\?|&)q=.*",
 | |
|   ],
 | |
|   google: [
 | |
|     // Google SERP
 | |
|     "^(https?:\\/\\/)?(www\\.)?google\\.(\\w|\\.){2,}\\/search.*(\\?|&)q=.*",
 | |
|     // Google OAuth
 | |
|     "^https:\\/\\/accounts\\.google\\.com\\/o\\/oauth2\\/v2\\/auth",
 | |
|     "^https:\\/\\/accounts\\.google\\.com\\/signin\\/oauth\\/consent",
 | |
|   ],
 | |
|   microsoftonline: [
 | |
|     // Microsoft OAuth
 | |
|     "^https:\\/\\/login\\.microsoftonline\\.com\\/common\\/oauth2\\/v2\\.0\\/authorize",
 | |
|   ],
 | |
|   yandex: [
 | |
|     // Yandex SERP
 | |
|     "^(https?:\\/\\/)?(www\\.)?yandex\\.(\\w|\\.){2,}\\/search.*(\\?|&)text=.*",
 | |
|   ],
 | |
|   zoom: [
 | |
|     // Zoom meeting interstitial
 | |
|     "^(https?:\\/\\/)?(www\\.)?.*\\.zoom\\.us\\/j\\/\\d+",
 | |
|   ],
 | |
| };
 | |
| 
 | |
| HOST_BLOCKLIST = new Proxy(HOST_BLOCKLIST, {
 | |
|   get(target, property) {
 | |
|     let regexes = target[property];
 | |
|     if (!regexes || !Array.isArray(regexes)) {
 | |
|       return null;
 | |
|     }
 | |
| 
 | |
|     for (let i = 0; i < regexes.length; i++) {
 | |
|       let regex = regexes[i];
 | |
|       if (typeof regex === "string") {
 | |
|         regex = new RegExp(regex, "i");
 | |
|         if (regex) {
 | |
|           regexes[i] = regex;
 | |
|         } else {
 | |
|           throw new Error("Blocklist contains invalid regex.");
 | |
|         }
 | |
|       }
 | |
|     }
 | |
|     return regexes;
 | |
|   },
 | |
| });
 | |
| 
 | |
| /**
 | |
|  * A class that maintains a blocklist of URLs. The class exposes a method to
 | |
|  * check if a particular URL is contained on the blocklist.
 | |
|  */
 | |
| class _InteractionsBlocklist {
 | |
|   constructor() {
 | |
|     // Load custom blocklist items from pref.
 | |
|     try {
 | |
|       let customBlocklist = JSON.parse(
 | |
|         Services.prefs.getStringPref(
 | |
|           "places.interactions.customBlocklist",
 | |
|           "[]"
 | |
|         )
 | |
|       );
 | |
|       if (!Array.isArray(customBlocklist)) {
 | |
|         throw new Error();
 | |
|       }
 | |
|       let parsedBlocklist = customBlocklist.map(
 | |
|         regexStr => new RegExp(regexStr)
 | |
|       );
 | |
|       HOST_BLOCKLIST["*"] = parsedBlocklist;
 | |
|     } catch (ex) {
 | |
|       lazy.logConsole.warn("places.interactions.customBlocklist is corrupted.");
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Only certain urls can be added as Interactions, either manually or
 | |
|    * automatically.
 | |
|    *
 | |
|    * @returns {Map} A Map keyed by protocol, for each protocol an object may
 | |
|    *          define stricter requirements, like extension.
 | |
|    */
 | |
|   get urlRequirements() {
 | |
|     return new Map([
 | |
|       ["http:", {}],
 | |
|       ["https:", {}],
 | |
|       ["file:", { extension: "pdf" }],
 | |
|     ]);
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Whether to record interactions for a given URL.
 | |
|    * The rules are defined in InteractionsBlocklist.urlRequirements.
 | |
|    *
 | |
|    * @param {string|URL|nsIURI} url The URL to check.
 | |
|    * @returns {boolean} whether the url can be recorded.
 | |
|    */
 | |
|   canRecordUrl(url) {
 | |
|     let protocol, pathname;
 | |
|     if (typeof url == "string") {
 | |
|       url = new URL(url);
 | |
|     }
 | |
|     if (url instanceof Ci.nsIURI) {
 | |
|       protocol = url.scheme + ":";
 | |
|       pathname = url.filePath;
 | |
|     } else {
 | |
|       protocol = url.protocol;
 | |
|       pathname = url.pathname;
 | |
|     }
 | |
|     let requirements = InteractionsBlocklist.urlRequirements.get(protocol);
 | |
|     return (
 | |
|       requirements &&
 | |
|       (!requirements.extension || pathname.endsWith(requirements.extension))
 | |
|     );
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Checks a URL against a blocklist of URLs. If the URL is blocklisted, we
 | |
|    * should not record an interaction.
 | |
|    *
 | |
|    * @param {string} urlToCheck
 | |
|    *   The URL we are looking for on the blocklist.
 | |
|    * @returns {boolean}
 | |
|    *  True if `url` is on a blocklist. False otherwise.
 | |
|    */
 | |
|   isUrlBlocklisted(urlToCheck) {
 | |
|     if (lazy.FilterAdult.isAdultUrl(urlToCheck)) {
 | |
|       return true;
 | |
|     }
 | |
| 
 | |
|     if (!this.canRecordUrl(urlToCheck)) {
 | |
|       return true;
 | |
|     }
 | |
| 
 | |
|     // First, find the URL's base host: the hostname without any subdomains or a
 | |
|     // public suffix.
 | |
|     let url;
 | |
|     try {
 | |
|       url = new URL(urlToCheck);
 | |
|       if (!url) {
 | |
|         throw new Error();
 | |
|       }
 | |
|     } catch (ex) {
 | |
|       lazy.logConsole.warn(
 | |
|         `Invalid URL passed to InteractionsBlocklist.isUrlBlocklisted: ${url}`
 | |
|       );
 | |
|       return false;
 | |
|     }
 | |
| 
 | |
|     if (url.protocol == "file:") {
 | |
|       return false;
 | |
|     }
 | |
| 
 | |
|     let hostWithoutSuffix = lazy.UrlbarUtils.stripPublicSuffixFromHost(
 | |
|       url.host
 | |
|     );
 | |
|     let [hostWithSubdomains] = lazy.UrlbarUtils.stripPrefixAndTrim(
 | |
|       hostWithoutSuffix,
 | |
|       {
 | |
|         stripWww: true,
 | |
|         trimTrailingDot: true,
 | |
|       }
 | |
|     );
 | |
|     let baseHost = hostWithSubdomains.substring(
 | |
|       hostWithSubdomains.lastIndexOf(".") + 1
 | |
|     );
 | |
|     // Then fetch blocked regexes for that baseHost and compare them to the full
 | |
|     // URL. Also check the URL against the custom blocklist.
 | |
|     let regexes = HOST_BLOCKLIST[baseHost.toLocaleLowerCase()] || [];
 | |
|     regexes.push(...(HOST_BLOCKLIST["*"] || []));
 | |
|     if (!regexes) {
 | |
|       return false;
 | |
|     }
 | |
| 
 | |
|     return regexes.some(r => r.test(url.href));
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Adds a regex to HOST_BLOCKLIST. Since we can't parse the base host from
 | |
|    * the regex, we add it to a list of wildcard regexes. All URLs are checked
 | |
|    * against these wildcard regexes. Currently only exposed for tests and use in
 | |
|    * the console. In the future we could hook this up to a UI component.
 | |
|    *
 | |
|    * @param {string|RegExp} regexToAdd
 | |
|    *   The regular expression to add to our blocklist.
 | |
|    */
 | |
|   addRegexToBlocklist(regexToAdd) {
 | |
|     let regex;
 | |
|     try {
 | |
|       regex = new RegExp(regexToAdd, "i");
 | |
|     } catch (ex) {
 | |
|       this.logConsole.warn("Invalid regex passed to addRegexToBlocklist.");
 | |
|       return;
 | |
|     }
 | |
| 
 | |
|     if (!HOST_BLOCKLIST["*"]) {
 | |
|       HOST_BLOCKLIST["*"] = [];
 | |
|     }
 | |
|     HOST_BLOCKLIST["*"].push(regex);
 | |
|     Services.prefs.setStringPref(
 | |
|       "places.interactions.customBlocklist",
 | |
|       JSON.stringify(HOST_BLOCKLIST["*"].map(reg => reg.toString()))
 | |
|     );
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Removes a regex from HOST_BLOCKLIST. If `regexToRemove` is not in the
 | |
|    * blocklist, this is a no-op. Currently only exposed for tests and use in the
 | |
|    * console. In the future we could hook this up to a UI component.
 | |
|    *
 | |
|    * @param {string|RegExp} regexToRemove
 | |
|    *   The regular expression to add to our blocklist.
 | |
|    */
 | |
|   removeRegexFromBlocklist(regexToRemove) {
 | |
|     let regex;
 | |
|     try {
 | |
|       regex = new RegExp(regexToRemove, "i");
 | |
|     } catch (ex) {
 | |
|       this.logConsole.warn("Invalid regex passed to addRegexToBlocklist.");
 | |
|       return;
 | |
|     }
 | |
| 
 | |
|     if (!HOST_BLOCKLIST["*"] || !Array.isArray(HOST_BLOCKLIST["*"])) {
 | |
|       return;
 | |
|     }
 | |
|     HOST_BLOCKLIST["*"] = HOST_BLOCKLIST["*"].filter(
 | |
|       curr => curr.source != regex.source
 | |
|     );
 | |
|     Services.prefs.setStringPref(
 | |
|       "places.interactions.customBlocklist",
 | |
|       JSON.stringify(HOST_BLOCKLIST["*"].map(reg => reg.toString()))
 | |
|     );
 | |
|   }
 | |
| }
 | |
| 
 | |
| export const InteractionsBlocklist = new _InteractionsBlocklist();
 | 
