forked from mirrors/gecko-dev
		
	
		
			
				
	
	
		
			398 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			398 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| /* This Source Code Form is subject to the terms of the Mozilla Public
 | |
|  * License, v. 2.0. If a copy of the MPL was not distributed with this
 | |
|  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 | |
| 
 | |
| /*
 | |
|  * This module tests TRR performance by issuing DNS requests to TRRs and
 | |
|  * recording telemetry for the network time for each request.
 | |
|  *
 | |
|  * We test each TRR with 5 random subdomains of a canonical domain and also
 | |
|  * a "popular" domain (which the TRR likely have cached).
 | |
|  *
 | |
|  * To ensure data integrity, we run the requests in an aggregator wrapper
 | |
|  * and collect all the results before sending telemetry. If we detect network
 | |
|  * loss, the results are discarded. A new run is triggered upon detection of
 | |
|  * usable network until a full set of results has been captured. We stop retrying
 | |
|  * after 5 attempts.
 | |
|  */
 | |
| Services.telemetry.setEventRecordingEnabled(
 | |
|   "security.doh.trrPerformance",
 | |
|   true
 | |
| );
 | |
| 
 | |
| import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs";
 | |
| 
 | |
| const lazy = {};
 | |
| 
 | |
| XPCOMUtils.defineLazyServiceGetter(
 | |
|   lazy,
 | |
|   "gNetworkLinkService",
 | |
|   "@mozilla.org/network/network-link-service;1",
 | |
|   "nsINetworkLinkService"
 | |
| );
 | |
| 
 | |
| XPCOMUtils.defineLazyServiceGetter(
 | |
|   lazy,
 | |
|   "gCaptivePortalService",
 | |
|   "@mozilla.org/network/captive-portal-service;1",
 | |
|   "nsICaptivePortalService"
 | |
| );
 | |
| 
 | |
| // The canonical domain whose subdomains we will be resolving.
 | |
| XPCOMUtils.defineLazyPreferenceGetter(
 | |
|   lazy,
 | |
|   "kCanonicalDomain",
 | |
|   "doh-rollout.trrRace.canonicalDomain",
 | |
|   "firefox-dns-perf-test.net."
 | |
| );
 | |
| 
 | |
| // The number of random subdomains to resolve per TRR.
 | |
| XPCOMUtils.defineLazyPreferenceGetter(
 | |
|   lazy,
 | |
|   "kRepeats",
 | |
|   "doh-rollout.trrRace.randomSubdomainCount",
 | |
|   5
 | |
| );
 | |
| 
 | |
| // The "popular" domain that we expect the TRRs to have cached.
 | |
| XPCOMUtils.defineLazyPreferenceGetter(
 | |
|   lazy,
 | |
|   "kPopularDomains",
 | |
|   "doh-rollout.trrRace.popularDomains",
 | |
|   null,
 | |
|   null,
 | |
|   val =>
 | |
|     val
 | |
|       ? val.split(",").map(t => t.trim())
 | |
|       : [
 | |
|           "google.com.",
 | |
|           "youtube.com.",
 | |
|           "amazon.com.",
 | |
|           "facebook.com.",
 | |
|           "yahoo.com.",
 | |
|         ]
 | |
| );
 | |
| 
 | |
| function getRandomSubdomain() {
 | |
|   let uuid = Services.uuid
 | |
|     .generateUUID()
 | |
|     .toString()
 | |
|     .slice(1, -1); // Discard surrounding braces
 | |
|   return `${uuid}.${lazy.kCanonicalDomain}`;
 | |
| }
 | |
| 
 | |
| // A wrapper around async DNS lookups. The results are passed on to the supplied
 | |
| // callback. The wrapper attempts the lookup 3 times before passing on a failure.
 | |
| // If a false-y `domain` is supplied, a random subdomain will be used. Each retry
 | |
| // will use a different random subdomain to ensure we bypass chached responses.
 | |
| export class DNSLookup {
 | |
|   constructor(domain, trrServer, callback) {
 | |
|     this._domain = domain;
 | |
|     this.trrServer = trrServer;
 | |
|     this.callback = callback;
 | |
|     this.retryCount = 0;
 | |
|   }
 | |
| 
 | |
|   doLookup() {
 | |
|     this.retryCount++;
 | |
|     try {
 | |
|       this.usedDomain = this._domain || getRandomSubdomain();
 | |
|       Services.dns.asyncResolve(
 | |
|         this.usedDomain,
 | |
|         Ci.nsIDNSService.RESOLVE_TYPE_DEFAULT,
 | |
|         Ci.nsIDNSService.RESOLVE_BYPASS_CACHE,
 | |
|         Services.dns.newAdditionalInfo(this.trrServer, -1),
 | |
|         this,
 | |
|         Services.tm.currentThread,
 | |
|         {}
 | |
|       );
 | |
|     } catch (e) {
 | |
|       console.error(e);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   onLookupComplete(request, record, status) {
 | |
|     // Try again if we failed...
 | |
|     if (!Components.isSuccessCode(status) && this.retryCount < 3) {
 | |
|       this.doLookup();
 | |
|       return;
 | |
|     }
 | |
| 
 | |
|     // But after the third try, just pass the status on.
 | |
|     this.callback(request, record, status, this.usedDomain, this.retryCount);
 | |
|   }
 | |
| }
 | |
| 
 | |
| DNSLookup.prototype.QueryInterface = ChromeUtils.generateQI(["nsIDNSListener"]);
 | |
| 
 | |
| // A wrapper around a single set of measurements. The required lookups are
 | |
| // triggered and the results aggregated before telemetry is sent. If aborted,
 | |
| // any aggregated results are discarded.
 | |
| export class LookupAggregator {
 | |
|   constructor(onCompleteCallback, trrList) {
 | |
|     this.onCompleteCallback = onCompleteCallback;
 | |
|     this.trrList = trrList;
 | |
|     this.aborted = false;
 | |
|     this.networkUnstable = false;
 | |
|     this.captivePortal = false;
 | |
| 
 | |
|     this.domains = [];
 | |
|     for (let i = 0; i < lazy.kRepeats; ++i) {
 | |
|       // false-y domain will cause DNSLookup to generate a random one.
 | |
|       this.domains.push(null);
 | |
|     }
 | |
|     this.domains.push(...lazy.kPopularDomains);
 | |
|     this.totalLookups = this.trrList.length * this.domains.length;
 | |
|     this.completedLookups = 0;
 | |
|     this.results = [];
 | |
|   }
 | |
| 
 | |
|   run() {
 | |
|     if (this._ran || this._aborted) {
 | |
|       console.error("Trying to re-run a LookupAggregator.");
 | |
|       return;
 | |
|     }
 | |
| 
 | |
|     this._ran = true;
 | |
|     for (let trr of this.trrList) {
 | |
|       for (let domain of this.domains) {
 | |
|         new DNSLookup(
 | |
|           domain,
 | |
|           trr,
 | |
|           (request, record, status, usedDomain, retryCount) => {
 | |
|             this.results.push({
 | |
|               domain: usedDomain,
 | |
|               trr,
 | |
|               status,
 | |
|               time: record
 | |
|                 ? record.QueryInterface(Ci.nsIDNSAddrRecord)
 | |
|                     .trrFetchDurationNetworkOnly
 | |
|                 : -1,
 | |
|               retryCount,
 | |
|             });
 | |
| 
 | |
|             this.completedLookups++;
 | |
|             if (this.completedLookups == this.totalLookups) {
 | |
|               this.recordResults();
 | |
|             }
 | |
|           }
 | |
|         ).doLookup();
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   abort() {
 | |
|     this.aborted = true;
 | |
|   }
 | |
| 
 | |
|   markUnstableNetwork() {
 | |
|     this.networkUnstable = true;
 | |
|   }
 | |
| 
 | |
|   markCaptivePortal() {
 | |
|     this.captivePortal = true;
 | |
|   }
 | |
| 
 | |
|   recordResults() {
 | |
|     if (this.aborted) {
 | |
|       return;
 | |
|     }
 | |
| 
 | |
|     for (let { domain, trr, status, time, retryCount } of this.results) {
 | |
|       if (
 | |
|         !(
 | |
|           lazy.kPopularDomains.includes(domain) ||
 | |
|           domain.includes(lazy.kCanonicalDomain)
 | |
|         )
 | |
|       ) {
 | |
|         console.error("Expected known domain for reporting, got ", domain);
 | |
|         return;
 | |
|       }
 | |
| 
 | |
|       Services.telemetry.recordEvent(
 | |
|         "security.doh.trrPerformance",
 | |
|         "resolved",
 | |
|         "record",
 | |
|         "success",
 | |
|         {
 | |
|           domain,
 | |
|           trr,
 | |
|           status: status.toString(),
 | |
|           time: time.toString(),
 | |
|           retryCount: retryCount.toString(),
 | |
|           networkUnstable: this.networkUnstable.toString(),
 | |
|           captivePortal: this.captivePortal.toString(),
 | |
|         }
 | |
|       );
 | |
|     }
 | |
| 
 | |
|     this.onCompleteCallback();
 | |
|   }
 | |
| }
 | |
| 
 | |
| // This class monitors the network and spawns a new LookupAggregator when ready.
 | |
| // When the network goes down, an ongoing aggregator is aborted and a new one
 | |
| // spawned next time we get a link, up to 5 times. On the fifth time, we just
 | |
| // let the aggegator complete and mark it as tainted.
 | |
| export class TRRRacer {
 | |
|   constructor(onCompleteCallback, trrList) {
 | |
|     this._aggregator = null;
 | |
|     this._retryCount = 0;
 | |
|     this._complete = false;
 | |
|     this._onCompleteCallback = onCompleteCallback;
 | |
|     this._trrList = trrList;
 | |
|   }
 | |
| 
 | |
|   run() {
 | |
|     if (
 | |
|       lazy.gNetworkLinkService.isLinkUp &&
 | |
|       lazy.gCaptivePortalService.state !=
 | |
|         lazy.gCaptivePortalService.LOCKED_PORTAL
 | |
|     ) {
 | |
|       this._runNewAggregator();
 | |
|       if (
 | |
|         lazy.gCaptivePortalService.state ==
 | |
|         lazy.gCaptivePortalService.UNLOCKED_PORTAL
 | |
|       ) {
 | |
|         this._aggregator.markCaptivePortal();
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     Services.obs.addObserver(this, "ipc:network:captive-portal-set-state");
 | |
|     Services.obs.addObserver(this, "network:link-status-changed");
 | |
|   }
 | |
| 
 | |
|   onComplete() {
 | |
|     Services.obs.removeObserver(this, "ipc:network:captive-portal-set-state");
 | |
|     Services.obs.removeObserver(this, "network:link-status-changed");
 | |
| 
 | |
|     this._complete = true;
 | |
| 
 | |
|     if (this._onCompleteCallback) {
 | |
|       this._onCompleteCallback();
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   getFastestTRR(returnRandomDefault = false) {
 | |
|     if (!this._complete) {
 | |
|       throw new Error("getFastestTRR: Measurement still running.");
 | |
|     }
 | |
| 
 | |
|     return this._getFastestTRRFromResults(
 | |
|       this._aggregator.results,
 | |
|       returnRandomDefault
 | |
|     );
 | |
|   }
 | |
| 
 | |
|   /*
 | |
|    * Given an array of { trr, time }, returns the trr with smallest mean time.
 | |
|    * Separate from _getFastestTRR for easy unit-testing.
 | |
|    *
 | |
|    * @returns The TRR with the fastest average time.
 | |
|    *          If returnRandomDefault is false-y, returns undefined if no valid
 | |
|    *          times were present in the results. Otherwise, returns one of the
 | |
|    *          present TRRs at random.
 | |
|    */
 | |
|   _getFastestTRRFromResults(results, returnRandomDefault = false) {
 | |
|     // First, organize the results into a map of TRR -> array of times
 | |
|     let TRRTimingMap = new Map();
 | |
|     let TRRErrorCount = new Map();
 | |
|     for (let { trr, time } of results) {
 | |
|       if (!TRRTimingMap.has(trr)) {
 | |
|         TRRTimingMap.set(trr, []);
 | |
|       }
 | |
|       if (time != -1) {
 | |
|         TRRTimingMap.get(trr).push(time);
 | |
|       } else {
 | |
|         TRRErrorCount.set(trr, 1 + (TRRErrorCount.get(trr) || 0));
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     // Loop through each TRR's array of times, compute the geometric means,
 | |
|     // and remember the fastest TRR. Geometric mean is a bit more forgiving
 | |
|     // in the presence of noise (anomalously high values).
 | |
|     // We don't need the full geometric mean, we simply calculate the arithmetic
 | |
|     // means in log-space and then compare those values.
 | |
|     let fastestTRR;
 | |
|     let fastestAverageTime = -1;
 | |
|     let trrs = [...TRRTimingMap.keys()];
 | |
|     for (let trr of trrs) {
 | |
|       let times = TRRTimingMap.get(trr);
 | |
|       if (!times.length) {
 | |
|         continue;
 | |
|       }
 | |
| 
 | |
|       // Skip TRRs that had an error rate of more than 30%.
 | |
|       let errorCount = TRRErrorCount.get(trr) || 0;
 | |
|       let totalResults = times.length + errorCount;
 | |
|       if (errorCount / totalResults > 0.3) {
 | |
|         continue;
 | |
|       }
 | |
| 
 | |
|       // Arithmetic mean in log space. Take log of (a + 1) to ensure we never
 | |
|       // take log(0) which would be -Infinity.
 | |
|       let averageTime =
 | |
|         times.map(a => Math.log(a + 1)).reduce((a, b) => a + b) / times.length;
 | |
|       if (fastestAverageTime == -1 || averageTime < fastestAverageTime) {
 | |
|         fastestAverageTime = averageTime;
 | |
|         fastestTRR = trr;
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     if (returnRandomDefault && !fastestTRR) {
 | |
|       fastestTRR = trrs[Math.floor(Math.random() * trrs.length)];
 | |
|     }
 | |
| 
 | |
|     return fastestTRR;
 | |
|   }
 | |
| 
 | |
|   _runNewAggregator() {
 | |
|     this._aggregator = new LookupAggregator(
 | |
|       () => this.onComplete(),
 | |
|       this._trrList
 | |
|     );
 | |
|     this._aggregator.run();
 | |
|     this._retryCount++;
 | |
|   }
 | |
| 
 | |
|   // When the link goes *down*, or when we detect a locked captive portal, we
 | |
|   // abort any ongoing LookupAggregator run. When the link goes *up*, or we
 | |
|   // detect a newly unlocked portal, we start a run if one isn't ongoing.
 | |
|   observe(subject, topic, data) {
 | |
|     switch (topic) {
 | |
|       case "network:link-status-changed":
 | |
|         if (this._aggregator && data == "down") {
 | |
|           if (this._retryCount < 5) {
 | |
|             this._aggregator.abort();
 | |
|           } else {
 | |
|             this._aggregator.markUnstableNetwork();
 | |
|           }
 | |
|         } else if (
 | |
|           data == "up" &&
 | |
|           (!this._aggregator || this._aggregator.aborted)
 | |
|         ) {
 | |
|           this._runNewAggregator();
 | |
|         }
 | |
|         break;
 | |
|       case "ipc:network:captive-portal-set-state":
 | |
|         if (
 | |
|           this._aggregator &&
 | |
|           lazy.gCaptivePortalService.state ==
 | |
|             lazy.gCaptivePortalService.LOCKED_PORTAL
 | |
|         ) {
 | |
|           if (this._retryCount < 5) {
 | |
|             this._aggregator.abort();
 | |
|           } else {
 | |
|             this._aggregator.markCaptivePortal();
 | |
|           }
 | |
|         } else if (
 | |
|           lazy.gCaptivePortalService.state ==
 | |
|             lazy.gCaptivePortalService.UNLOCKED_PORTAL &&
 | |
|           (!this._aggregator || this._aggregator.aborted)
 | |
|         ) {
 | |
|           this._runNewAggregator();
 | |
|         }
 | |
|         break;
 | |
|     }
 | |
|   }
 | |
| }
 | 
