Bug 1861516 - Remove Translations Remote Settings language-identification infrastructure r=gregtatum

Removes the infrastructure to download language-identification
dependencies for Translations via Remote Settings. This was used
only for the fastText LanguageIdEngine code. The current
language detector, CLD2, lives in tree and does not require
Remote Settings dependencies.

Depends on D192657

Differential Revision: https://phabricator.services.mozilla.com/D192658
This commit is contained in:
Erik Nordin 2023-11-09 04:59:04 +00:00
parent bd90c5a1d9
commit ba7544062d
8 changed files with 7 additions and 353 deletions

View file

@ -117,13 +117,8 @@ add_task(async function test_about_preferences_manage_languages() {
"All models were downloaded."
);
Assert.deepEqual(
await remoteClients.languageIdModels.resolvePendingDownloads(1),
["lid.176.ftz"],
"Language ID model was downloaded."
);
Assert.deepEqual(
await remoteClients.translationsWasm.resolvePendingDownloads(2),
["bergamot-translator", "fasttext-wasm"],
await remoteClients.translationsWasm.resolvePendingDownloads(1),
["bergamot-translator"],
"Wasm was downloaded."
);
@ -159,7 +154,6 @@ add_task(async function test_about_preferences_manage_languages() {
);
remoteClients.translationsWasm.assertNoNewDownloads();
remoteClients.languageIdModels.assertNoNewDownloads();
await assertVisibility({
message: "Everything is downloaded again.",

View file

@ -129,13 +129,11 @@ const VERIFY_SIGNATURES_FROM_FS = false;
/**
* @typedef {import("../translations").TranslationModelRecord} TranslationModelRecord
* @typedef {import("../translations").RemoteSettingsClient} RemoteSettingsClient
* @typedef {import("../translations").LanguageIdEngineMockedPayload} LanguageIdEngineMockedPayload
* @typedef {import("../translations").LanguageTranslationModelFiles} LanguageTranslationModelFiles
* @typedef {import("../translations").WasmRecord} WasmRecord
* @typedef {import("../translations").LangTags} LangTags
* @typedef {import("../translations").LanguagePair} LanguagePair
* @typedef {import("../translations").SupportedLanguages} SupportedLanguages
* @typedef {import("../translations").LanguageIdModelRecord} LanguageIdModelRecord
* @typedef {import("../translations").TranslationErrors} TranslationErrors
*/
@ -221,13 +219,6 @@ export class TranslationsParent extends JSWindowActorParent {
}
}
/**
* The remote settings client that retrieves the language-identification model binary.
*
* @type {RemoteSettingsClient | null}
*/
static #languageIdModelsRemoteClient = null;
/**
* A map of the TranslationModelRecord["id"] to the record of the model in Remote Settings.
* Used to coordinate the downloads.
@ -264,22 +255,6 @@ export class TranslationsParent extends JSWindowActorParent {
*/
static #isTranslationsEngineMocked = false;
/**
* The language identification engine can be mocked for testing
* by pre-defining this value.
*
* @type {string | null}
*/
static #mockedLangTag = null;
/**
* The language identification engine can be mocked for testing
* by pre-defining this value.
*
* @type {number | null}
*/
static #mockedLanguageIdConfidence = null;
/**
* @type {null | Promise<boolean>}
*/
@ -778,18 +753,6 @@ export class TranslationsParent extends JSWindowActorParent {
async receiveMessage({ name, data }) {
switch (name) {
case "Translations:GetLanguageIdEnginePayload": {
const [modelBuffer, wasmBuffer] = await Promise.all([
TranslationsParent.#getLanguageIdModelArrayBuffer(),
TranslationsParent.#getLanguageIdWasmArrayBuffer(),
]);
return {
modelBuffer,
wasmBuffer,
mockedConfidence: TranslationsParent.#mockedLanguageIdConfidence,
mockedLangTag: TranslationsParent.#mockedLangTag,
};
}
case "Translations:ReportLangTags": {
const { documentElementLang, href } = data;
const detectedLanguages = await this.getDetectedLanguages(
@ -956,152 +919,6 @@ export class TranslationsParent extends JSWindowActorParent {
return TranslationsParent.shouldAlwaysTranslateLanguage(langTags);
}
/** @type {Promise<LanguageIdModelRecord> | null} */
static #languageIdModelRecord = null;
/**
* Retrieves the language-identification model binary from remote settings.
*
* @returns {Promise<ArrayBuffer>}
*/
static async #getLanguageIdModelArrayBuffer() {
lazy.console.log("Getting language-identification model array buffer.");
const now = Date.now();
const client = TranslationsParent.#getLanguageIdModelRemoteClient();
if (!TranslationsParent.#languageIdModelRecord) {
// Place the records into a promise to prevent any races.
TranslationsParent.#languageIdModelRecord = (async () => {
/** @type {LanguageIdModelRecord[]} */
let modelRecords = await TranslationsParent.getMaxVersionRecords(
client
);
if (modelRecords.length === 0) {
throw new Error(
"Unable to get language-identification model record from remote settings"
);
}
if (modelRecords.length > 1) {
TranslationsParent.reportError(
new Error(
"Expected the language-identification model collection to have only 1 record."
),
modelRecords
);
}
return modelRecords[0];
})();
}
await chaosMode(1 / 3);
try {
/** @type {{buffer: ArrayBuffer}} */
const { buffer } = await client.attachments.download(
await TranslationsParent.#languageIdModelRecord
);
const duration = (Date.now() - now) / 1000;
lazy.console.log(
`Remote language-identification model loaded in ${duration} seconds.`
);
return buffer;
} catch (error) {
TranslationsParent.#languageIdModelRecord = null;
throw error;
}
}
/**
* Initializes the RemoteSettingsClient for the language-identification model binary.
*
* @returns {RemoteSettingsClient}
*/
static #getLanguageIdModelRemoteClient() {
if (TranslationsParent.#languageIdModelsRemoteClient) {
return TranslationsParent.#languageIdModelsRemoteClient;
}
/** @type {RemoteSettingsClient} */
const client = lazy.RemoteSettings("translations-identification-models");
TranslationsParent.#languageIdModelsRemoteClient = client;
return client;
}
/** @type {Promise<LanguageIdModelRecord> | null} */
static #languageIdWasmRecord = null;
/**
* Retrieves the language-identification wasm binary from remote settings.
*
* @returns {Promise<ArrayBuffer>}
*/
static async #getLanguageIdWasmArrayBuffer() {
const start = Date.now();
const client = TranslationsParent.#getTranslationsWasmRemoteClient();
// Load the wasm binary from remote settings, if it hasn't been already.
lazy.console.log(`Getting remote language-identification wasm binary.`);
if (!TranslationsParent.#languageIdWasmRecord) {
// Place the records into a promise to prevent any races.
TranslationsParent.#languageIdWasmRecord = (async () => {
/** @type {WasmRecord[]} */
let wasmRecords = await TranslationsParent.getMaxVersionRecords(
client,
{
filters: { name: "fasttext-wasm" },
}
);
if (wasmRecords.length === 0) {
// The remote settings client provides an empty list of records when there is
// an error.
throw new Error(
'Unable to get "fasttext-wasm" language-identification wasm binary from Remote Settings.'
);
}
if (wasmRecords.length > 1) {
TranslationsParent.reportError(
new Error(
'Expected the "fasttext-wasm" language-identification wasm collection to only have 1 record.'
),
wasmRecords
);
}
return wasmRecords[0];
})();
}
try {
// Unlike the models, greedily download the wasm. It will pull it from a locale
// cache on disk if it's already been downloaded. Do not retain a copy, as
// this will be running in the parent process. It's not worth holding onto
// this much memory, so reload it every time it is needed.
await chaosMode(1 / 3);
/** @type {{buffer: ArrayBuffer}} */
const { buffer } = await client.attachments.download(
await TranslationsParent.#languageIdWasmRecord
);
const duration = (Date.now() - start) / 1000;
lazy.console.log(
`Remote language-identification wasm binary loaded in ${duration} seconds.`
);
return buffer;
} catch (error) {
TranslationsParent.#languageIdWasmRecord = null;
throw error;
}
}
/**
* Creates a lookup key that is unique to each fromLanguage-toLanguage pair.
*
@ -1321,7 +1138,7 @@ export class TranslationsParent extends JSWindowActorParent {
* This function should take a record as input and return a string that represents the lookup key for the record.
* For most record types, the name (default) is sufficient, however if a collection contains records with
* non-unique name values, it may be necessary to provide an alternative function here.
* @returns {Array<TranslationModelRecord | LanguageIdModelRecord | WasmRecord>}
* @returns {Array<TranslationModelRecord | WasmRecord>}
*/
static async getMaxVersionRecords(
remoteSettingsClient,
@ -1688,12 +1505,6 @@ export class TranslationsParent extends JSWindowActorParent {
queue.push({
download: () => TranslationsParent.#getBergamotWasmArrayBuffer(),
});
queue.push({
download: () => TranslationsParent.#getLanguageIdModelArrayBuffer(),
});
queue.push({
download: () => TranslationsParent.#getLanguageIdWasmArrayBuffer(),
});
return downloadManager(queue);
}
@ -1944,13 +1755,10 @@ export class TranslationsParent extends JSWindowActorParent {
// Records.
TranslationsParent.#bergamotWasmRecord = null;
TranslationsParent.#translationModelRecords = null;
TranslationsParent.#languageIdModelRecord = null;
TranslationsParent.#languageIdWasmRecord = null;
// Clients.
TranslationsParent.#translationModelsRemoteClient = null;
TranslationsParent.#translationsWasmRemoteClient = null;
TranslationsParent.#languageIdModelsRemoteClient = null;
// Derived data.
TranslationsParent.#preferredLanguages = null;
@ -1974,33 +1782,6 @@ export class TranslationsParent extends JSWindowActorParent {
TranslationsParent.#isTranslationsEngineMocked = false;
}
/**
* For testing purposes, allow the LanguageIdEngine to be mocked. If called
* with `null` in each argument, the mock is removed.
*
* @param {string} langTag - The BCP 47 language tag.
* @param {number} confidence - The confidence score of the detected language.
* @param {RemoteSettingsClient} client
*/
static mockLanguageIdentification(langTag, confidence, client) {
lazy.console.log("Mocking language identification.", {
langTag,
confidence,
});
TranslationsParent.#mockedLangTag = langTag;
TranslationsParent.#mockedLanguageIdConfidence = confidence;
TranslationsParent.#languageIdModelsRemoteClient = client;
}
/**
* Remove the mocks for the language identification, make sure and call clearCache after
* to remove the cached values.
*/
static unmockLanguageIdentification() {
lazy.console.log("Removing language identification mock.");
TranslationsParent.#mockedLangTag = null;
TranslationsParent.#mockedLanguageIdConfidence = null;
}
/**
* Report an error. Having this as a method allows tests to check that an error
* was properly reported.
@ -2151,7 +1932,7 @@ export class TranslationsParent extends JSWindowActorParent {
async queryIdentifyLanguage() {
if (
TranslationsParent.isInAutomation() &&
!TranslationsParent.#mockedLangTag
!TranslationsParent.#isTranslationsEngineMocked
) {
return null;
}

View file

@ -173,8 +173,6 @@ add_task(async function test_about_translations_html() {
add_task(async function test_about_translations_language_identification() {
await openAboutTranslations({
detectedLangTag: "en",
detectedLanguageConfidence: "0.98",
languagePairs: [
{ fromLang: "en", toLang: "fr" },
{ fromLang: "fr", toLang: "en" },

View file

@ -4,11 +4,9 @@
"use strict";
add_task(async function test_detected_language() {
const detectedLangTag = "en";
const { cleanup, tab } = await loadTestPage({
// This page will get its language changed by the test.
page: ENGLISH_PAGE_URL,
detectedLangTag,
autoDownloadFromRemoteSettings: true,
languagePairs: [
// Spanish
@ -77,7 +75,7 @@ add_task(async function test_detected_language() {
Assert.deepEqual(
await getDetectedLanguagesFor("gibberish"),
{
docLangTag: detectedLangTag,
docLangTag: "en",
userLangTag: null,
isDocLangTagSupported: true,
},

View file

@ -8,12 +8,9 @@
* issues.
*/
add_task(async function test_detected_language() {
const detectedLangTag = "en";
const { cleanup, tab } = await loadTestPage({
// This page will get its language changed by the test.
page: ENGLISH_PAGE_URL,
detectedLangTag,
autoDownloadFromRemoteSettings: true,
// Empty out the accept languages.
languagePairs: [

View file

@ -98,9 +98,6 @@ add_task(async function test_about_translations_enabled() {
add_task(async function test_language_identification_for_page_translation() {
await autoTranslatePage({
page: NO_LANGUAGE_URL,
detectedLangTag: "es",
detectedLanguageConfidence: 0.95,
resolveLanguageIdDownloads: true,
languagePairs: [
{ fromLang: "es", toLang: "en" },
{ fromLang: "en", toLang: "es" },

View file

@ -60,14 +60,6 @@ const NEVER_TRANSLATE_LANGS_PREF =
* @param {boolean} [options.disabled]
* Disable the panel through a pref.
*
* @param {number} detectedLanguageConfidence
* This is the value for the MockedLanguageIdEngine to give as a confidence score for
* the mocked detected language.
*
* @param {string} detectedLangTag
* This is the BCP 47 language tag for the MockedLanguageIdEngine to return as
* the mocked detected language.
*
* @param {Array<{ fromLang: string, toLang: string }>} options.languagePairs
* The translation languages pairs to mock for the test.
*
@ -78,8 +70,6 @@ async function openAboutTranslations({
dataForContent,
disabled,
runInPage,
detectedLanguageConfidence,
detectedLangTag,
languagePairs = LANGUAGE_PAIRS,
prefs,
}) {
@ -118,8 +108,6 @@ async function openAboutTranslations({
// TODO(Bug 1814168) - Do not test download behavior as this is not robustly
// handled for about:translations yet.
autoDownloadFromRemoteSettings: true,
detectedLangTag,
detectedLanguageConfidence,
});
// Now load the about:translations page, since the actor could be mocked.
@ -129,10 +117,7 @@ async function openAboutTranslations({
);
await BrowserTestUtils.browserLoaded(tab.linkedBrowser);
// Resolve the files.
await remoteClients.languageIdModels.resolvePendingDownloads(1);
// The language id and translation engine each have a wasm file, so expect 2 downloads.
await remoteClients.translationsWasm.resolvePendingDownloads(2);
await remoteClients.translationsWasm.resolvePendingDownloads(1);
await remoteClients.translationModels.resolvePendingDownloads(
languagePairs.length * FILES_PER_LANGUAGE_PAIR
);
@ -404,8 +389,6 @@ async function closeTranslationsPanelIfOpen() {
async function setupActorTest({
languagePairs,
prefs,
detectedLanguageConfidence,
detectedLangTag,
autoDownloadFromRemoteSettings = false,
}) {
await SpecialPowers.pushPrefEnv({
@ -419,8 +402,6 @@ async function setupActorTest({
const { remoteClients, removeMocks } = await createAndMockRemoteSettings({
languagePairs,
detectedLangTag,
detectedLanguageConfidence,
autoDownloadFromRemoteSettings,
});
@ -448,8 +429,6 @@ async function setupActorTest({
async function createAndMockRemoteSettings({
languagePairs = LANGUAGE_PAIRS,
detectedLanguageConfidence = 0.5,
detectedLangTag = "en",
autoDownloadFromRemoteSettings = false,
}) {
const remoteClients = {
@ -460,9 +439,6 @@ async function createAndMockRemoteSettings({
translationsWasm: await createTranslationsWasmRemoteClient(
autoDownloadFromRemoteSettings
),
languageIdModels: await createLanguageIdModelsRemoteClient(
autoDownloadFromRemoteSettings
),
};
// The TranslationsParent will pull the language pair values from the JSON dump
@ -474,23 +450,13 @@ async function createAndMockRemoteSettings({
remoteClients.translationsWasm.client
);
TranslationsParent.mockLanguageIdentification(
detectedLangTag,
detectedLanguageConfidence,
remoteClients.languageIdModels.client
);
return {
async removeMocks() {
await remoteClients.translationModels.client.attachments.deleteAll();
await remoteClients.translationsWasm.client.attachments.deleteAll();
await remoteClients.languageIdModels.client.attachments.deleteAll();
await remoteClients.translationModels.client.db.clear();
await remoteClients.translationsWasm.client.db.clear();
await remoteClients.languageIdModels.client.db.clear();
TranslationsParent.unmockTranslationsEngine();
TranslationsParent.unmockLanguageIdentification();
TranslationsParent.clearCache();
},
remoteClients,
@ -500,8 +466,6 @@ async function createAndMockRemoteSettings({
async function loadTestPage({
languagePairs,
autoDownloadFromRemoteSettings = false,
detectedLanguageConfidence,
detectedLangTag,
page,
prefs,
autoOffer,
@ -542,8 +506,6 @@ async function loadTestPage({
const { remoteClients, removeMocks } = await createAndMockRemoteSettings({
languagePairs,
detectedLanguageConfidence,
detectedLangTag,
autoDownloadFromRemoteSettings,
});
@ -582,11 +544,6 @@ async function loadTestPage({
);
},
async resolveLanguageIdDownloads() {
await remoteClients.translationsWasm.resolvePendingDownloads(1);
await remoteClients.languageIdModels.resolvePendingDownloads(1);
},
/**
* @returns {Promise<void>}
*/
@ -860,7 +817,7 @@ async function createTranslationModelsRemoteClient(
async function createTranslationsWasmRemoteClient(
autoDownloadFromRemoteSettings
) {
const records = ["bergamot-translator", "fasttext-wasm"].map(name => ({
const records = ["bergamot-translator"].map(name => ({
id: crypto.randomUUID(),
name,
version: "1.0",
@ -886,43 +843,6 @@ async function createTranslationsWasmRemoteClient(
);
}
/**
* Creates a local RemoteSettingsClient for use within tests.
*
* @param {boolean} autoDownloadFromRemoteSettings
* @returns {RemoteSettingsClient}
*/
async function createLanguageIdModelsRemoteClient(
autoDownloadFromRemoteSettings
) {
const records = [
{
id: crypto.randomUUID(),
name: "lid.176.ftz",
version: "1.0",
last_modified: Date.now(),
schema: Date.now(),
},
];
const { RemoteSettings } = ChromeUtils.importESModule(
"resource://services-settings/remote-settings.sys.mjs"
);
const client = RemoteSettings(
"test-language-id-models" + _remoteSettingsMockId++
);
const mockedCollectionName = "test-language-id-models";
const metadata = {};
await client.db.clear();
await client.db.importChanges(metadata, Date.now(), records);
return createAttachmentMock(
client,
mockedCollectionName,
autoDownloadFromRemoteSettings
);
}
async function selectAboutPreferencesElements() {
const document = gBrowser.selectedBrowser.contentDocument;

View file

@ -24,25 +24,6 @@ export interface Attachment {
mimetype: string;
}
/**
* The JSON that is synced from Remote Settings for the language-id models.
*/
export interface LanguageIdModelRecord {
// e.g. "0d4db293-a17c-4085-9bd8-e2e146c85000"
id: string;
// The full model name, e.g. "lid.176.ftz"
name: string;
// The semver number, used for handling future format changes. e.g. 1.0
version: string;
// The file attachment for this record
attachment: Attachment;
// e.g. 1673455932527
last_modified: string;
// A JEXL expression to determine whether this record should be pulled from Remote Settings
// See: https://remote-settings.readthedocs.io/en/latest/target-filters.html#filter-expressions
filter_expression: string;
}
/**
* The JSON that is synced from Remote Settings for the translation models.
*/
@ -261,18 +242,6 @@ interface TranslationsEnginePayload {
isMocked: boolean,
}
/**
* These are the files that are downloaded from Remote Settings that are necessary
* to start the language-identification engine. These may not be available if running
* in tests.
*/
interface LanguageIdEnginePayload {
wasmBuffer: ArrayBuffer,
modelBuffer: ArrayBuffer,
mockedConfidence: null | number,
mockedLangTag: null | string,
}
/**
* Nodes that are being translated are given priority according to their visibility.
*/