forked from mirrors/gecko-dev
		
	Differential Revision: https://phabricator.services.mozilla.com/D59933 --HG-- extra : moz-landing-system : lando
		
			
				
	
	
		
			390 lines
		
	
	
	
		
			13 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			390 lines
		
	
	
	
		
			13 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
/* This Source Code Form is subject to the terms of the Mozilla Public
 | 
						|
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 | 
						|
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 | 
						|
"use strict";
 | 
						|
 | 
						|
const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm");
 | 
						|
 | 
						|
ChromeUtils.defineModuleGetter(
 | 
						|
  this,
 | 
						|
  "PlacesUtils",
 | 
						|
  "resource://gre/modules/PlacesUtils.jsm"
 | 
						|
);
 | 
						|
 | 
						|
const DEFAULT_TIME_SEGMENTS = [
 | 
						|
  { id: "hour", startTime: 3600, endTime: 0, weightPosition: 1 },
 | 
						|
  { id: "day", startTime: 86400, endTime: 3600, weightPosition: 0.75 },
 | 
						|
  { id: "week", startTime: 604800, endTime: 86400, weightPosition: 0.5 },
 | 
						|
  { id: "weekPlus", startTime: 0, endTime: 604800, weightPosition: 0.25 },
 | 
						|
  { id: "alltime", startTime: 0, endTime: 0, weightPosition: 0.25 },
 | 
						|
];
 | 
						|
 | 
						|
const DEFAULT_PARAMETER_SETS = {
 | 
						|
  "linear-frequency": {
 | 
						|
    recencyFactor: 0.4,
 | 
						|
    frequencyFactor: 0.5,
 | 
						|
    combinedDomainFactor: 0.5,
 | 
						|
    perfectFrequencyVisits: 10,
 | 
						|
    perfectCombinedDomainScore: 2,
 | 
						|
    multiDomainBoost: 0.1,
 | 
						|
    itemScoreFactor: 0,
 | 
						|
  },
 | 
						|
};
 | 
						|
 | 
						|
const DEFAULT_MAX_HISTORY_QUERY_RESULTS = 1000;
 | 
						|
 | 
						|
function merge(...args) {
 | 
						|
  return Object.assign.apply(this, args);
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Provides functionality to personalize content recommendations by calculating
 | 
						|
 * user domain affinity scores. These scores are used to calculate relevance
 | 
						|
 * scores for items/recs/stories that have domain affinities.
 | 
						|
 *
 | 
						|
 * The algorithm works as follows:
 | 
						|
 *
 | 
						|
 * - The recommendation endpoint returns a settings object containing
 | 
						|
 * timeSegments and parametersets.
 | 
						|
 *
 | 
						|
 * - For every time segment we calculate the corresponding domain visit counts,
 | 
						|
 * yielding result objects of the following structure: {"mozilla.org": 12,
 | 
						|
 * "mozilla.com": 34} (see UserDomainAffinityProvider#queryVisits)
 | 
						|
 *
 | 
						|
 * - These visit counts are transformed to domain affinity scores for all
 | 
						|
 * provided parameter sets: {"mozilla.org": {"paramSet1": 0.8,
 | 
						|
 * "paramSet2": 0.9}, "mozilla.org": {"paramSet1": 1, "paramSet2": 0.9}}
 | 
						|
 * (see UserDomainAffinityProvider#calculateScoresForParameterSets)
 | 
						|
 *
 | 
						|
 * - The parameter sets provide factors for weighting which allows for
 | 
						|
 * flexible targeting. The functionality to calculate final scores can
 | 
						|
 * be seen in UserDomainAffinityProvider#calculateScores
 | 
						|
 *
 | 
						|
 * - The user domain affinity scores are summed up across all time segments
 | 
						|
 * see UserDomainAffinityProvider#calculateAllUserDomainAffinityScores
 | 
						|
 *
 | 
						|
 * - An item's domain affinities are matched to the user's domain affinity
 | 
						|
 * scores by calculating an item relevance score
 | 
						|
 * (see UserDomainAffinityProvider#calculateItemRelevanceScore)
 | 
						|
 *
 | 
						|
 * - The item relevance scores are used to sort items (see TopStoriesFeed for
 | 
						|
 * more details)
 | 
						|
 *
 | 
						|
 * - The data structure was chosen to allow for fast cache lookups during
 | 
						|
 * relevance score calculation. While user domain affinities are calculated
 | 
						|
 * infrequently (i.e. only once a day), the item relevance score (potentially)
 | 
						|
 * needs to be calculated every time the feed updates. Therefore allowing cache
 | 
						|
 * lookups of scores[domain][parameterSet] is beneficial
 | 
						|
 */
 | 
						|
this.UserDomainAffinityProvider = class UserDomainAffinityProvider {
 | 
						|
  constructor(
 | 
						|
    timeSegments = DEFAULT_TIME_SEGMENTS,
 | 
						|
    parameterSets = DEFAULT_PARAMETER_SETS,
 | 
						|
    maxHistoryQueryResults = DEFAULT_MAX_HISTORY_QUERY_RESULTS,
 | 
						|
    version,
 | 
						|
    scores
 | 
						|
  ) {
 | 
						|
    this.timeSegments = timeSegments;
 | 
						|
    this.maxHistoryQueryResults = maxHistoryQueryResults;
 | 
						|
    this.version = version;
 | 
						|
    if (scores) {
 | 
						|
      this.parameterSets = parameterSets;
 | 
						|
      this.scores = scores;
 | 
						|
    } else {
 | 
						|
      this.parameterSets = this.prepareParameterSets(parameterSets);
 | 
						|
      this.scores = this.calculateAllUserDomainAffinityScores();
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Adds dynamic parameters to the given parameter sets that need to be
 | 
						|
   * computed based on time segments.
 | 
						|
   *
 | 
						|
   * @param ps The parameter sets
 | 
						|
   * @return Updated parameter sets with additional fields (i.e. timeSegmentWeights)
 | 
						|
   */
 | 
						|
  prepareParameterSets(ps) {
 | 
						|
    return (
 | 
						|
      Object.keys(ps)
 | 
						|
        // Add timeSegmentWeight fields to param sets e.g. timeSegmentWeights: {"hour": 1, "day": 0.8915, ...}
 | 
						|
        .map(k => ({
 | 
						|
          [k]: merge(ps[k], {
 | 
						|
            timeSegmentWeights: this.calculateTimeSegmentWeights(
 | 
						|
              ps[k].recencyFactor
 | 
						|
            ),
 | 
						|
          }),
 | 
						|
        }))
 | 
						|
        .reduce((acc, cur) => merge(acc, cur))
 | 
						|
    );
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Calculates a time segment weight based on the provided recencyFactor.
 | 
						|
   *
 | 
						|
   * @param recencyFactor The recency factor indicating how to weigh recency
 | 
						|
   * @return An object containing time segment weights: {"hour": 0.987, "day": 1}
 | 
						|
   */
 | 
						|
  calculateTimeSegmentWeights(recencyFactor) {
 | 
						|
    return this.timeSegments.reduce(
 | 
						|
      (acc, cur) =>
 | 
						|
        merge(acc, {
 | 
						|
          [cur.id]: this.calculateScore(cur.weightPosition, 1, recencyFactor),
 | 
						|
        }),
 | 
						|
      {}
 | 
						|
    );
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Calculates user domain affinity scores based on browsing history and the
 | 
						|
   * available times segments and parameter sets.
 | 
						|
   */
 | 
						|
  calculateAllUserDomainAffinityScores() {
 | 
						|
    return (
 | 
						|
      this.timeSegments
 | 
						|
        // Calculate parameter set specific domain scores for each time segment
 | 
						|
        // => [{"a.com": {"ps1": 12, "ps2": 34}, "b.com": {"ps1": 56, "ps2": 78}}, ...]
 | 
						|
        .map(ts => this.calculateUserDomainAffinityScores(ts))
 | 
						|
        // Keep format, but reduce to single object, with combined scores across all time segments
 | 
						|
        // => "{a.com":{"ps1":2,"ps2":2}, "b.com":{"ps1":3,"ps2":3}}""
 | 
						|
        .reduce((acc, cur) => this._combineScores(acc, cur))
 | 
						|
    );
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Calculates the user domain affinity scores for the given time segment.
 | 
						|
   *
 | 
						|
   * @param ts The time segment
 | 
						|
   * @return The parameter specific scores for all domains with visits in
 | 
						|
   * this time segment: {"a.com": {"ps1": 12, "ps2": 34}, "b.com" ...}
 | 
						|
   */
 | 
						|
  calculateUserDomainAffinityScores(ts) {
 | 
						|
    // Returns domains and visit counts for this time segment: {"a.com": 1, "b.com": 2}
 | 
						|
    let visits = this.queryVisits(ts);
 | 
						|
 | 
						|
    return Object.keys(visits).reduce(
 | 
						|
      (acc, d) =>
 | 
						|
        merge(acc, {
 | 
						|
          [d]: this.calculateScoresForParameterSets(ts, visits[d]),
 | 
						|
        }),
 | 
						|
      {}
 | 
						|
    );
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Calculates the scores for all parameter sets for the given time segment
 | 
						|
   * and domain visit count.
 | 
						|
   *
 | 
						|
   * @param ts The time segment
 | 
						|
   * @param vc The domain visit count in the given time segment
 | 
						|
   * @return The parameter specific scores for the visit count in
 | 
						|
   * this time segment: {"ps1": 12, "ps2": 34}
 | 
						|
   */
 | 
						|
  calculateScoresForParameterSets(ts, vc) {
 | 
						|
    return Object.keys(this.parameterSets).reduce(
 | 
						|
      (acc, ps) =>
 | 
						|
        merge(acc, {
 | 
						|
          [ps]: this.calculateScoreForParameterSet(
 | 
						|
            ts,
 | 
						|
            vc,
 | 
						|
            this.parameterSets[ps]
 | 
						|
          ),
 | 
						|
        }),
 | 
						|
      {}
 | 
						|
    );
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Calculates the final affinity score in the given time segment for the given parameter set
 | 
						|
   *
 | 
						|
   * @param timeSegment The time segment
 | 
						|
   * @param visitCount The domain visit count in the given time segment
 | 
						|
   * @param parameterSet The parameter set to use for scoring
 | 
						|
   * @return The final score
 | 
						|
   */
 | 
						|
  calculateScoreForParameterSet(timeSegment, visitCount, parameterSet) {
 | 
						|
    return this.calculateScore(
 | 
						|
      visitCount * parameterSet.timeSegmentWeights[timeSegment.id],
 | 
						|
      parameterSet.perfectFrequencyVisits,
 | 
						|
      parameterSet.frequencyFactor
 | 
						|
    );
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Keeps the same format, but reduces the two objects to a single object, with
 | 
						|
   * combined scores across all time segments  => {a.com":{"ps1":2,"ps2":2},
 | 
						|
   * "b.com":{"ps1":3,"ps2":3}}
 | 
						|
   */
 | 
						|
  _combineScores(a, b) {
 | 
						|
    // Merge both score objects so we get a combined object holding all domains.
 | 
						|
    // This is so we can combine them without missing domains that are in a and not in b and vice versa.
 | 
						|
    const c = merge({}, a, b);
 | 
						|
    return Object.keys(c).reduce(
 | 
						|
      (acc, d) => merge(acc, this._combine(a, b, c, d)),
 | 
						|
      {}
 | 
						|
    );
 | 
						|
  }
 | 
						|
 | 
						|
  _combine(a, b, c, d) {
 | 
						|
    return (
 | 
						|
      Object.keys(c[d])
 | 
						|
        // Summing up the parameter set specific scores of each domain
 | 
						|
        .map(ps => ({
 | 
						|
          [d]: {
 | 
						|
            [ps]: Math.min(
 | 
						|
              1,
 | 
						|
              ((a[d] && a[d][ps]) || 0) + ((b[d] && b[d][ps]) || 0)
 | 
						|
            ),
 | 
						|
          },
 | 
						|
        }))
 | 
						|
        // Reducing from an array of objects with a single parameter set to a single object
 | 
						|
        // [{"a.com":{"ps1":11}}, {"a.com: {"ps2":12}}] => {"a.com":{"ps1":11,"ps2":12}}
 | 
						|
        .reduce((acc, cur) => ({ [d]: merge(acc[d], cur[d]) }))
 | 
						|
    );
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Calculates a value on the curve described by the provided parameters. The curve we're using is
 | 
						|
   * (a^(b*x) - 1) / (a^b - 1): https://www.desmos.com/calculator/maqhpttupp
 | 
						|
   *
 | 
						|
   * @param {number} score A value between 0 and maxScore, representing x.
 | 
						|
   * @param {number} maxScore Highest possible score.
 | 
						|
   * @param {number} factor The slope describing the curve to get to maxScore. A low slope value
 | 
						|
   * [0, 0.5] results in a log-shaped curve, a high slope [0.5, 1] results in a exp-shaped curve,
 | 
						|
   * a slope of exactly 0.5 is linear.
 | 
						|
   * @param {number} ease Adjusts how much bend is in the curve i.e. how dramatic the maximum
 | 
						|
   * effect of the slope can be. This represents b in the formula above.
 | 
						|
   * @return {number} the final score
 | 
						|
   */
 | 
						|
  calculateScore(score, maxScore, factor, ease = 2) {
 | 
						|
    let a = 0;
 | 
						|
    let x = Math.max(0, score / maxScore);
 | 
						|
 | 
						|
    if (x >= 1) {
 | 
						|
      return 1;
 | 
						|
    }
 | 
						|
 | 
						|
    if (factor === 0.5) {
 | 
						|
      return x;
 | 
						|
    }
 | 
						|
 | 
						|
    if (factor < 0.5) {
 | 
						|
      // We want a log-shaped curve so we scale "a" between 0 and .99
 | 
						|
      a = (factor / 0.5) * 0.49;
 | 
						|
    } else if (factor > 0.5) {
 | 
						|
      // We want an exp-shaped curve so we scale "a" between 1.01 and 10
 | 
						|
      a = 1 + ((factor - 0.5) / 0.5) * 9;
 | 
						|
    }
 | 
						|
 | 
						|
    return (Math.pow(a, ease * x) - 1) / (Math.pow(a, ease) - 1);
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Queries the visit counts in the given time segment.
 | 
						|
   *
 | 
						|
   * @param ts the time segment
 | 
						|
   * @return the visit count object: {"a.com": 1, "b.com": 2}
 | 
						|
   */
 | 
						|
  queryVisits(ts) {
 | 
						|
    const visitCounts = {};
 | 
						|
    const query = PlacesUtils.history.getNewQuery();
 | 
						|
    if (!query) {
 | 
						|
      return visitCounts;
 | 
						|
    }
 | 
						|
    const wwwRegEx = /^www\./;
 | 
						|
 | 
						|
    query.beginTimeReference = query.TIME_RELATIVE_NOW;
 | 
						|
    query.beginTime =
 | 
						|
      ts.startTime && ts.startTime !== 0
 | 
						|
        ? -(ts.startTime * 1000 * 1000)
 | 
						|
        : -(Date.now() * 1000);
 | 
						|
 | 
						|
    query.endTimeReference = query.TIME_RELATIVE_NOW;
 | 
						|
    query.endTime =
 | 
						|
      ts.endTime && ts.endTime !== 0 ? -(ts.endTime * 1000 * 1000) : 0;
 | 
						|
 | 
						|
    const options = PlacesUtils.history.getNewQueryOptions();
 | 
						|
    options.sortingMode = options.SORT_BY_VISITCOUNT_DESCENDING;
 | 
						|
    options.maxResults = this.maxHistoryQueryResults;
 | 
						|
 | 
						|
    const { root } = PlacesUtils.history.executeQuery(query, options);
 | 
						|
    root.containerOpen = true;
 | 
						|
    for (let i = 0; i < root.childCount; i++) {
 | 
						|
      let node = root.getChild(i);
 | 
						|
      let host = Services.io.newURI(node.uri).host.replace(wwwRegEx, "");
 | 
						|
      if (!visitCounts[host]) {
 | 
						|
        visitCounts[host] = 0;
 | 
						|
      }
 | 
						|
      visitCounts[host] += node.accessCount;
 | 
						|
    }
 | 
						|
    root.containerOpen = false;
 | 
						|
    return visitCounts;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Calculates an item's relevance score.
 | 
						|
   *
 | 
						|
   * @param item the item (story), must contain domain affinities, otherwise a
 | 
						|
   * score of 1 is returned.
 | 
						|
   * @return the calculated item's score or 1 if item has no domain_affinities
 | 
						|
   * or references an unknown parameter set.
 | 
						|
   */
 | 
						|
  calculateItemRelevanceScore(item) {
 | 
						|
    const params = this.parameterSets[item.parameter_set];
 | 
						|
    if (!item.domain_affinities || !params) {
 | 
						|
      return item.item_score;
 | 
						|
    }
 | 
						|
 | 
						|
    const scores = Object.keys(item.domain_affinities).reduce(
 | 
						|
      (acc, d) => {
 | 
						|
        let userDomainAffinityScore = this.scores[d]
 | 
						|
          ? this.scores[d][item.parameter_set]
 | 
						|
          : false;
 | 
						|
        if (userDomainAffinityScore) {
 | 
						|
          acc.combinedDomainScore +=
 | 
						|
            userDomainAffinityScore * item.domain_affinities[d];
 | 
						|
          acc.matchingDomainsCount++;
 | 
						|
        }
 | 
						|
        return acc;
 | 
						|
      },
 | 
						|
      { combinedDomainScore: 0, matchingDomainsCount: 0 }
 | 
						|
    );
 | 
						|
 | 
						|
    // Boost the score as configured in the provided parameter set
 | 
						|
    const boostedCombinedDomainScore =
 | 
						|
      scores.combinedDomainScore *
 | 
						|
      Math.pow(params.multiDomainBoost + 1, scores.matchingDomainsCount);
 | 
						|
 | 
						|
    // Calculate what the score would be if the item score is ignored
 | 
						|
    const normalizedCombinedDomainScore = this.calculateScore(
 | 
						|
      boostedCombinedDomainScore,
 | 
						|
      params.perfectCombinedDomainScore,
 | 
						|
      params.combinedDomainFactor
 | 
						|
    );
 | 
						|
 | 
						|
    // Calculate the final relevance score using the itemScoreFactor. The itemScoreFactor
 | 
						|
    // allows weighting the item score in relation to the normalizedCombinedDomainScore:
 | 
						|
    // An itemScoreFactor of 1 results in the item score and ignores the combined domain score
 | 
						|
    // An itemScoreFactor of 0.5 results in the the average of item score and combined domain score
 | 
						|
    // An itemScoreFactor of 0 results in the combined domain score and ignores the item score
 | 
						|
    return (
 | 
						|
      params.itemScoreFactor *
 | 
						|
        (item.item_score - normalizedCombinedDomainScore) +
 | 
						|
      normalizedCombinedDomainScore
 | 
						|
    );
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Returns an object holding the settings and affinity scores of this provider instance.
 | 
						|
   */
 | 
						|
  getAffinities() {
 | 
						|
    return {
 | 
						|
      timeSegments: this.timeSegments,
 | 
						|
      parameterSets: this.parameterSets,
 | 
						|
      maxHistoryQueryResults: this.maxHistoryQueryResults,
 | 
						|
      version: this.version,
 | 
						|
      scores: this.scores,
 | 
						|
    };
 | 
						|
  }
 | 
						|
};
 | 
						|
 | 
						|
const EXPORTED_SYMBOLS = ["UserDomainAffinityProvider"];
 |