forked from mirrors/gecko-dev
311 lines
8.9 KiB
JavaScript
311 lines
8.9 KiB
JavaScript
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
"use strict";
|
|
|
|
// PersonalityProviderWorker.js imports the following scripts before this.
|
|
/* import-globals-from Tokenize.jsm */
|
|
/* import-globals-from NaiveBayesTextTagger.jsm */
|
|
/* import-globals-from NmfTextTagger.jsm */
|
|
/* import-globals-from RecipeExecutor.jsm */
|
|
|
|
// We load this into a worker using importScripts, and in tests using import.
|
|
// We use var to avoid name collision errors.
|
|
// eslint-disable-next-line no-var
|
|
var EXPORTED_SYMBOLS = ["PersonalityProviderWorker"];
|
|
|
|
// A helper function to create a hash out of a file.
|
|
async function _getFileHash(filepath) {
|
|
const data = await IOUtils.read(filepath);
|
|
// File is an instance of Uint8Array
|
|
const digest = await crypto.subtle.digest("SHA-256", data);
|
|
const uint8 = new Uint8Array(digest);
|
|
// return the two-digit hexadecimal code for a byte
|
|
const toHex = b => b.toString(16).padStart(2, "0");
|
|
return Array.from(uint8, toHex).join("");
|
|
}
|
|
|
|
/**
|
|
* V2 provider builds and ranks an interest profile (also called an “interest vector”) off the browse history.
|
|
* This allows Firefox to classify pages into topics, by examining the text found on the page.
|
|
* It does this by looking at the history text content, title, and description.
|
|
*/
|
|
const PersonalityProviderWorker = class PersonalityProviderWorker {
|
|
async getPersonalityProviderDir() {
|
|
const personalityProviderDir = PathUtils.join(
|
|
await PathUtils.getLocalProfileDir(),
|
|
"personality-provider"
|
|
);
|
|
|
|
// Cache this so we don't need to await again.
|
|
this.getPersonalityProviderDir = () =>
|
|
Promise.resolve(personalityProviderDir);
|
|
return personalityProviderDir;
|
|
}
|
|
|
|
setBaseAttachmentsURL(url) {
|
|
this.baseAttachmentsURL = url;
|
|
}
|
|
|
|
setInterestConfig(interestConfig) {
|
|
this.interestConfig = interestConfig;
|
|
}
|
|
|
|
setInterestVector(interestVector) {
|
|
this.interestVector = interestVector;
|
|
}
|
|
|
|
onSync(event) {
|
|
const {
|
|
data: { created, updated, deleted },
|
|
} = event;
|
|
// Remove every removed attachment.
|
|
const toRemove = deleted.concat(updated.map(u => u.old));
|
|
toRemove.forEach(record => this.deleteAttachment(record));
|
|
|
|
// Download every new/updated attachment.
|
|
const toDownload = created.concat(updated.map(u => u.new));
|
|
// maybeDownloadAttachment is async but we don't care inside onSync.
|
|
toDownload.forEach(record => this.maybeDownloadAttachment(record));
|
|
}
|
|
|
|
/**
|
|
* Attempts to download the attachment, but only if it doesn't already exist.
|
|
*/
|
|
async maybeDownloadAttachment(record, retries = 3) {
|
|
const {
|
|
attachment: { filename, hash, size },
|
|
} = record;
|
|
await IOUtils.makeDirectory(await this.getPersonalityProviderDir());
|
|
const localFilePath = PathUtils.join(
|
|
await this.getPersonalityProviderDir(),
|
|
filename
|
|
);
|
|
|
|
let retry = 0;
|
|
while (
|
|
retry++ < retries &&
|
|
// exists is an issue for perf because I might not need to call it.
|
|
(!(await IOUtils.exists(localFilePath)) ||
|
|
(await IOUtils.stat(localFilePath)).size !== size ||
|
|
(await _getFileHash(localFilePath)) !== hash)
|
|
) {
|
|
await this._downloadAttachment(record);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Downloads the attachment to disk assuming the dir already exists
|
|
* and any existing files matching the filename are clobbered.
|
|
*/
|
|
async _downloadAttachment(record) {
|
|
const {
|
|
attachment: { location, filename },
|
|
} = record;
|
|
const remoteFilePath = this.baseAttachmentsURL + location;
|
|
const localFilePath = PathUtils.join(
|
|
await this.getPersonalityProviderDir(),
|
|
filename
|
|
);
|
|
|
|
const xhr = new XMLHttpRequest();
|
|
// Set false here for a synchronous request, because we're in a worker.
|
|
xhr.open("GET", remoteFilePath, false);
|
|
xhr.setRequestHeader("Accept-Encoding", "gzip");
|
|
xhr.responseType = "arraybuffer";
|
|
xhr.withCredentials = false;
|
|
xhr.send(null);
|
|
|
|
if (xhr.status !== 200) {
|
|
console.error(`Failed to fetch ${remoteFilePath}: ${xhr.statusText}`);
|
|
return;
|
|
}
|
|
|
|
const buffer = xhr.response;
|
|
const bytes = new Uint8Array(buffer);
|
|
|
|
await IOUtils.write(localFilePath, bytes, {
|
|
tmpPath: `${localFilePath}.tmp`,
|
|
});
|
|
}
|
|
|
|
async deleteAttachment(record) {
|
|
const {
|
|
attachment: { filename },
|
|
} = record;
|
|
await IOUtils.makeDirectory(await this.getPersonalityProviderDir());
|
|
const path = PathUtils.join(
|
|
await this.getPersonalityProviderDir(),
|
|
filename
|
|
);
|
|
|
|
await IOUtils.remove(path, { ignoreAbsent: true });
|
|
// Cleanup the directory if it is empty, do nothing if it is not empty.
|
|
try {
|
|
await IOUtils.remove(await this.getPersonalityProviderDir(), {
|
|
ignoreAbsent: true,
|
|
});
|
|
} catch (e) {
|
|
// This is likely because the directory is not empty, so we don't care.
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Gets contents of the attachment if it already exists on file,
|
|
* and if not attempts to download it.
|
|
*/
|
|
async getAttachment(record) {
|
|
const {
|
|
attachment: { filename },
|
|
} = record;
|
|
const filepath = PathUtils.join(
|
|
await this.getPersonalityProviderDir(),
|
|
filename
|
|
);
|
|
|
|
try {
|
|
await this.maybeDownloadAttachment(record);
|
|
return await IOUtils.readJSON(filepath);
|
|
} catch (error) {
|
|
console.error(`Failed to load ${filepath}: ${error.message}`);
|
|
}
|
|
return {};
|
|
}
|
|
|
|
async fetchModels(models) {
|
|
this.models = await Promise.all(
|
|
models.map(async record => ({
|
|
...(await this.getAttachment(record)),
|
|
recordKey: record.key,
|
|
}))
|
|
);
|
|
if (!this.models.length) {
|
|
return {
|
|
ok: false,
|
|
};
|
|
}
|
|
return {
|
|
ok: true,
|
|
};
|
|
}
|
|
|
|
generateTaggers(modelKeys) {
|
|
if (!this.taggers) {
|
|
let nbTaggers = [];
|
|
let nmfTaggers = {};
|
|
|
|
for (let model of this.models) {
|
|
if (!modelKeys.includes(model.recordKey)) {
|
|
continue;
|
|
}
|
|
if (model.model_type === "nb") {
|
|
nbTaggers.push(new NaiveBayesTextTagger(model, toksToTfIdfVector));
|
|
} else if (model.model_type === "nmf") {
|
|
nmfTaggers[model.parent_tag] = new NmfTextTagger(
|
|
model,
|
|
toksToTfIdfVector
|
|
);
|
|
}
|
|
}
|
|
this.taggers = { nbTaggers, nmfTaggers };
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Sets and generates a Recipe Executor.
|
|
* A Recipe Executor is a set of actions that can be consumed by a Recipe.
|
|
* The Recipe determines the order and specifics of which the actions are called.
|
|
*/
|
|
generateRecipeExecutor() {
|
|
const recipeExecutor = new RecipeExecutor(
|
|
this.taggers.nbTaggers,
|
|
this.taggers.nmfTaggers,
|
|
tokenize
|
|
);
|
|
this.recipeExecutor = recipeExecutor;
|
|
}
|
|
|
|
/**
|
|
* Examines the user's browse history and returns an interest vector that
|
|
* describes the topics the user frequently browses.
|
|
*/
|
|
createInterestVector(history) {
|
|
let interestVector = {};
|
|
|
|
for (let historyRec of history) {
|
|
let ivItem = this.recipeExecutor.executeRecipe(
|
|
historyRec,
|
|
this.interestConfig.history_item_builder
|
|
);
|
|
if (ivItem === null) {
|
|
continue;
|
|
}
|
|
interestVector = this.recipeExecutor.executeCombinerRecipe(
|
|
interestVector,
|
|
ivItem,
|
|
this.interestConfig.interest_combiner
|
|
);
|
|
if (interestVector === null) {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
const finalResult = this.recipeExecutor.executeRecipe(
|
|
interestVector,
|
|
this.interestConfig.interest_finalizer
|
|
);
|
|
|
|
return {
|
|
ok: true,
|
|
interestVector: finalResult,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Calculates a score of a Pocket item when compared to the user's interest
|
|
* vector. Returns the score. Higher scores are better. Assumes this.interestVector
|
|
* is populated.
|
|
*/
|
|
calculateItemRelevanceScore(pocketItem) {
|
|
const { personalization_models } = pocketItem;
|
|
let scorableItem;
|
|
|
|
// If the server provides some models, we can just use them,
|
|
// and skip generating them.
|
|
if (personalization_models && Object.keys(personalization_models).length) {
|
|
scorableItem = {
|
|
id: pocketItem.id,
|
|
item_tags: personalization_models,
|
|
item_score: pocketItem.item_score,
|
|
item_sort_id: 1,
|
|
};
|
|
} else {
|
|
scorableItem = this.recipeExecutor.executeRecipe(
|
|
pocketItem,
|
|
this.interestConfig.item_to_rank_builder
|
|
);
|
|
if (scorableItem === null) {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
// We're doing a deep copy on an object.
|
|
let rankingVector = JSON.parse(JSON.stringify(this.interestVector));
|
|
|
|
Object.keys(scorableItem).forEach(key => {
|
|
rankingVector[key] = scorableItem[key];
|
|
});
|
|
|
|
rankingVector = this.recipeExecutor.executeRecipe(
|
|
rankingVector,
|
|
this.interestConfig.item_ranker
|
|
);
|
|
|
|
if (rankingVector === null) {
|
|
return null;
|
|
}
|
|
|
|
return { scorableItem, rankingVector };
|
|
}
|
|
};
|