Bug 1895888 - Vendor fully-implemented Rust relevancy component r=bdk

Differential Revision: https://phabricator.services.mozilla.com/D209964
This commit is contained in:
Nan Jiang 2024-05-09 19:56:58 +00:00
parent 2f9ab8fab6
commit eb54ba3346
27 changed files with 945 additions and 326 deletions

View file

@ -60,9 +60,9 @@ git = "https://github.com/mozilla-spidermonkey/jsparagus"
rev = "61f399c53a641ebd3077c1f39f054f6d396a633c" rev = "61f399c53a641ebd3077c1f39f054f6d396a633c"
replace-with = "vendored-sources" replace-with = "vendored-sources"
[source."git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434"] [source."git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a"]
git = "https://github.com/mozilla/application-services" git = "https://github.com/mozilla/application-services"
rev = "bf37a5174a1dcb7a890dc35386d58f9b77f82434" rev = "e0563d725f852f617878ecc13a03cdf50c85cd5a"
replace-with = "vendored-sources" replace-with = "vendored-sources"
[source."git+https://github.com/mozilla/audioipc?rev=409e11f8de6288e9ddfe269654523735302e59e6"] [source."git+https://github.com/mozilla/audioipc?rev=409e11f8de6288e9ddfe269654523735302e59e6"]

31
Cargo.lock generated
View file

@ -1680,7 +1680,7 @@ dependencies = [
[[package]] [[package]]
name = "error-support" name = "error-support"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434" source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
dependencies = [ dependencies = [
"error-support-macros", "error-support-macros",
"lazy_static", "lazy_static",
@ -1692,7 +1692,7 @@ dependencies = [
[[package]] [[package]]
name = "error-support-macros" name = "error-support-macros"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434" source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -2965,7 +2965,7 @@ dependencies = [
[[package]] [[package]]
name = "interrupt-support" name = "interrupt-support"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434" source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
dependencies = [ dependencies = [
"lazy_static", "lazy_static",
"parking_lot", "parking_lot",
@ -4173,7 +4173,7 @@ dependencies = [
[[package]] [[package]]
name = "nss_build_common" name = "nss_build_common"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434" source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
[[package]] [[package]]
name = "nsstring" name = "nsstring"
@ -4827,14 +4827,19 @@ checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
[[package]] [[package]]
name = "relevancy" name = "relevancy"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434" source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
dependencies = [ dependencies = [
"anyhow",
"base64 0.21.3",
"error-support", "error-support",
"interrupt-support", "interrupt-support",
"log", "log",
"md-5", "md-5",
"parking_lot", "parking_lot",
"remote_settings",
"rusqlite", "rusqlite",
"serde",
"serde_json",
"sql-support", "sql-support",
"thiserror", "thiserror",
"uniffi", "uniffi",
@ -4844,7 +4849,7 @@ dependencies = [
[[package]] [[package]]
name = "remote_settings" name = "remote_settings"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434" source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
dependencies = [ dependencies = [
"parking_lot", "parking_lot",
"serde", "serde",
@ -5372,7 +5377,7 @@ dependencies = [
[[package]] [[package]]
name = "sql-support" name = "sql-support"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434" source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
dependencies = [ dependencies = [
"ffi-support", "ffi-support",
"interrupt-support", "interrupt-support",
@ -5554,7 +5559,7 @@ checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
[[package]] [[package]]
name = "suggest" name = "suggest"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434" source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"chrono", "chrono",
@ -5603,7 +5608,7 @@ dependencies = [
[[package]] [[package]]
name = "sync-guid" name = "sync-guid"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434" source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
dependencies = [ dependencies = [
"base64 0.21.3", "base64 0.21.3",
"rand", "rand",
@ -5614,7 +5619,7 @@ dependencies = [
[[package]] [[package]]
name = "sync15" name = "sync15"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434" source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"error-support", "error-support",
@ -5646,7 +5651,7 @@ dependencies = [
[[package]] [[package]]
name = "tabs" name = "tabs"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434" source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"error-support", "error-support",
@ -6319,7 +6324,7 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]] [[package]]
name = "viaduct" name = "viaduct"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434" source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
dependencies = [ dependencies = [
"ffi-support", "ffi-support",
"log", "log",
@ -6467,7 +6472,7 @@ dependencies = [
[[package]] [[package]]
name = "webext-storage" name = "webext-storage"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434" source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"error-support", "error-support",

View file

@ -223,14 +223,14 @@ warp = { git = "https://github.com/seanmonstar/warp", rev = "9d081461ae1167eb321
malloc_size_of_derive = { path = "xpcom/rust/malloc_size_of_derive" } malloc_size_of_derive = { path = "xpcom/rust/malloc_size_of_derive" }
# application-services overrides to make updating them all simpler. # application-services overrides to make updating them all simpler.
interrupt-support = { git = "https://github.com/mozilla/application-services", rev = "bf37a5174a1dcb7a890dc35386d58f9b77f82434" } interrupt-support = { git = "https://github.com/mozilla/application-services", rev = "e0563d725f852f617878ecc13a03cdf50c85cd5a" }
relevancy = { git = "https://github.com/mozilla/application-services", rev = "bf37a5174a1dcb7a890dc35386d58f9b77f82434" } relevancy = { git = "https://github.com/mozilla/application-services", rev = "e0563d725f852f617878ecc13a03cdf50c85cd5a" }
sql-support = { git = "https://github.com/mozilla/application-services", rev = "bf37a5174a1dcb7a890dc35386d58f9b77f82434" } sql-support = { git = "https://github.com/mozilla/application-services", rev = "e0563d725f852f617878ecc13a03cdf50c85cd5a" }
suggest = { git = "https://github.com/mozilla/application-services", rev = "bf37a5174a1dcb7a890dc35386d58f9b77f82434" } suggest = { git = "https://github.com/mozilla/application-services", rev = "e0563d725f852f617878ecc13a03cdf50c85cd5a" }
sync15 = { git = "https://github.com/mozilla/application-services", rev = "bf37a5174a1dcb7a890dc35386d58f9b77f82434" } sync15 = { git = "https://github.com/mozilla/application-services", rev = "e0563d725f852f617878ecc13a03cdf50c85cd5a" }
tabs = { git = "https://github.com/mozilla/application-services", rev = "bf37a5174a1dcb7a890dc35386d58f9b77f82434" } tabs = { git = "https://github.com/mozilla/application-services", rev = "e0563d725f852f617878ecc13a03cdf50c85cd5a" }
viaduct = { git = "https://github.com/mozilla/application-services", rev = "bf37a5174a1dcb7a890dc35386d58f9b77f82434" } viaduct = { git = "https://github.com/mozilla/application-services", rev = "e0563d725f852f617878ecc13a03cdf50c85cd5a" }
webext-storage = { git = "https://github.com/mozilla/application-services", rev = "bf37a5174a1dcb7a890dc35386d58f9b77f82434" } webext-storage = { git = "https://github.com/mozilla/application-services", rev = "e0563d725f852f617878ecc13a03cdf50c85cd5a" }
# Patch mio 0.8.8 to use windows-sys 0.52 (backport https://github.com/tokio-rs/mio/commit/eea9e3e0c469480e5c59c01e6c3c7e5fd88f0848) # Patch mio 0.8.8 to use windows-sys 0.52 (backport https://github.com/tokio-rs/mio/commit/eea9e3e0c469480e5c59c01e6c3c7e5fd88f0848)
mio_0_8 = { package = "mio", git = "https://github.com/glandium/mio", rev = "9a2ef335c366044ffe73b1c4acabe50a1daefe05" } mio_0_8 = { package = "mio", git = "https://github.com/glandium/mio", rev = "9a2ef335c366044ffe73b1c4acabe50a1daefe05" }

View file

@ -3047,3 +3047,5 @@ pref("startup.homepage_override_nimbus_maxVersion", "");
// Pref to enable the content relevancy feature. // Pref to enable the content relevancy feature.
pref("toolkit.contentRelevancy.enabled", false); pref("toolkit.contentRelevancy.enabled", false);
// Pref to enable the ingestion through the Rust component.
pref("toolkit.contentRelevancy.ingestEnabled", false);

View file

@ -1 +1 @@
{"files":{"Cargo.toml":"8b8d81c6af8ab402f8febf103e10917a55886cacb01d0448c4426a2b54d244d0","build.rs":"a562bfe527d21c4e8a1a44b892defa83cdff141ec5dd51ed6f3862330e50ddd7","src/bin/generate-test-data.rs":"7f1c9dc445418c7627f89d1f2aa8e550d0f85b3d1f05edb7c378ab9441714f1f","src/db.rs":"9470c4566fc6296571b35d493f752d8a1f3c1fd8f7f420007ee3fa3e762af92b","src/error.rs":"00a10d47c9cfd25c4104174ec07eca6a08103564cb1b2c4961739f17f2892fb2","src/interest.rs":"e28b51c9692905ca87e6ab23decf1c1b3897bf29cb3d0d61f71213553b561dcc","src/lib.rs":"a42ffd826fd38a5f9436d9de00fd7b548e233a39063fbc030cae10052e0b4253","src/populate_interests.rs":"96c825796c6cfb7b1bb3a11c6d1b9c3639107943f5d35a259e195fec15aeef4e","src/relevancy.udl":"3de62ea53b4f34c11ff94c782b8389d58525ca40bb292b4b81370025813def5e","src/schema.rs":"f782c712f10c4f1af2f9e1424d6b52f59a2bacfcc452a8feb763f36478f5dd5d","src/url_hash.rs":"5619a249d471e7b642d889bad09e93212559c8b947010d49492c1423da2b310e","test-data":"1ef2cd092d59e7e126cd4a514af983d449ed9f9c98708702fd237464a76c2b5e"},"package":null} {"files":{"Cargo.toml":"2b7bf33e20b6aa768dd18619845e9d5d22235d86f770e94b250ed0052662ce2d","build.rs":"a562bfe527d21c4e8a1a44b892defa83cdff141ec5dd51ed6f3862330e50ddd7","src/bin/generate-test-data.rs":"7f1c9dc445418c7627f89d1f2aa8e550d0f85b3d1f05edb7c378ab9441714f1f","src/db.rs":"7ca5688c42d44ad6e5320208257d131c5c744be47a1cfe3e1380147abf2aadc3","src/error.rs":"0fe48e211dffb2010f732672c38e1c79b1995df3e70b06398ed8ac43d326c1b1","src/ingest.rs":"d3f528c1d62b4b6af404bb14cb0d431f8d523911ada09e4e1db5836b6cf44e04","src/interest.rs":"adbaa1e0324c7bb32b023f105b45499390a1a83973d1a8c7d727a661a25cc259","src/lib.rs":"29ce35211c9d94d561d62d7e8ef57fc56cc90a9ba42b88b54c2f4c9236a8cd4d","src/relevancy.udl":"b551e7476f30dccdc74cbf2f38fc3b87a3a7d0ec5dfa6c2ea4417b18fbc7475c","src/rs.rs":"b98091d0adca809d8fef38eb5394f885e04d4d382b7c8abd7bd0fe53f64e7bd6","src/schema.rs":"f782c712f10c4f1af2f9e1424d6b52f59a2bacfcc452a8feb763f36478f5dd5d","src/url_hash.rs":"2e908316fb70923644d1990dbf470d69ce2f5e99b0c5c3d95ec691590be8ffa5","test-data":"1ef2cd092d59e7e126cd4a514af983d449ed9f9c98708702fd237464a76c2b5e"},"package":null}

View file

@ -25,9 +25,12 @@ license = "MPL-2.0"
name = "generate-test-data" name = "generate-test-data"
[dependencies] [dependencies]
anyhow = "1.0"
base64 = "0.21.2"
log = "0.4" log = "0.4"
md-5 = "0.10" md-5 = "0.10"
parking_lot = ">=0.11,<=0.12" parking_lot = ">=0.11,<=0.12"
serde_json = "1"
thiserror = "1.0" thiserror = "1.0"
uniffi = "0.27.1" uniffi = "0.27.1"
url = "2.5" url = "2.5"
@ -38,10 +41,17 @@ path = "../support/error"
[dependencies.interrupt-support] [dependencies.interrupt-support]
path = "../support/interrupt" path = "../support/interrupt"
[dependencies.remote_settings]
path = "../remote_settings"
[dependencies.rusqlite] [dependencies.rusqlite]
version = "0.30.0" version = "0.30.0"
features = ["bundled"] features = ["bundled"]
[dependencies.serde]
version = "1"
features = ["derive"]
[dependencies.sql-support] [dependencies.sql-support]
path = "../support/sql" path = "../support/sql"

View file

@ -20,7 +20,7 @@ pub struct RelevancyDb {
} }
impl RelevancyDb { impl RelevancyDb {
pub fn new(path: impl AsRef<Path>) -> Result<Self> { pub fn new(path: impl AsRef<Path>) -> Self {
// Note: use `SQLITE_OPEN_READ_WRITE` for both read and write connections. // Note: use `SQLITE_OPEN_READ_WRITE` for both read and write connections.
// Even if we're opening a read connection, we may need to do a write as part of the // Even if we're opening a read connection, we may need to do a write as part of the
// initialization process. // initialization process.
@ -31,10 +31,10 @@ impl RelevancyDb {
| OpenFlags::SQLITE_OPEN_NO_MUTEX | OpenFlags::SQLITE_OPEN_NO_MUTEX
| OpenFlags::SQLITE_OPEN_CREATE | OpenFlags::SQLITE_OPEN_CREATE
| OpenFlags::SQLITE_OPEN_READ_WRITE; | OpenFlags::SQLITE_OPEN_READ_WRITE;
Ok(Self { Self {
reader: LazyDb::new(path.as_ref(), db_open_flags, RelevancyConnectionInitializer), reader: LazyDb::new(path.as_ref(), db_open_flags, RelevancyConnectionInitializer),
writer: LazyDb::new(path.as_ref(), db_open_flags, RelevancyConnectionInitializer), writer: LazyDb::new(path.as_ref(), db_open_flags, RelevancyConnectionInitializer),
}) }
} }
pub fn close(&self) { pub fn close(&self) {
@ -52,7 +52,7 @@ impl RelevancyDb {
use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::atomic::{AtomicU32, Ordering};
static COUNTER: AtomicU32 = AtomicU32::new(0); static COUNTER: AtomicU32 = AtomicU32::new(0);
let count = COUNTER.fetch_add(1, Ordering::Relaxed); let count = COUNTER.fetch_add(1, Ordering::Relaxed);
Self::new(format!("file:test{count}.sqlite?mode=memory&cache=shared")).unwrap() Self::new(format!("file:test{count}.sqlite?mode=memory&cache=shared"))
} }
/// Accesses the Suggest database in a transaction for reading. /// Accesses the Suggest database in a transaction for reading.
@ -118,7 +118,7 @@ impl<'a> RelevancyDao<'a> {
", ",
)?; )?;
let interests = stmt.query_and_then((hash,), |row| -> Result<Interest> { let interests = stmt.query_and_then((hash,), |row| -> Result<Interest> {
Ok(row.get::<_, u32>(0)?.into()) row.get::<_, u32>(0)?.try_into()
})?; })?;
let mut interest_vec = InterestVector::default(); let mut interest_vec = InterestVector::default();

View file

@ -26,6 +26,18 @@ pub enum Error {
#[error("Interrupted")] #[error("Interrupted")]
Interrupted(#[from] interrupt_support::Interrupted), Interrupted(#[from] interrupt_support::Interrupted),
#[error("Invalid interest code: {0}")]
InvalidInterestCode(u32),
#[error("Remote Setting Error: {0}")]
RemoteSettingsError(#[from] remote_settings::RemoteSettingsError),
#[error("Serde Json Error: {0}")]
SerdeJsonError(#[from] serde_json::Error),
#[error("Base64 Decode Error: {0}")]
Base64DecodeError(String),
} }
/// Result enum for the public API /// Result enum for the public API

394
third_party/rust/relevancy/src/ingest.rs vendored Normal file
View file

@ -0,0 +1,394 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use crate::db::RelevancyDao;
use crate::rs::{
RelevancyAttachmentData, RelevancyRecord, RelevancyRemoteSettingsClient,
REMOTE_SETTINGS_COLLECTION,
};
use crate::url_hash::UrlHash;
use crate::{Error, Interest, RelevancyDb, Result};
use base64::{engine::general_purpose::STANDARD, Engine};
use remote_settings::{Client, RemoteSettingsConfig, RemoteSettingsRecord, RemoteSettingsServer};
// Number of rows to write when inserting interest data before checking for interruption
const WRITE_CHUNK_SIZE: usize = 100;
pub fn ensure_interest_data_populated(db: &RelevancyDb) -> Result<()> {
if !db.read(|dao| dao.need_to_load_url_interests())? {
return Ok(());
}
match fetch_interest_data() {
Ok(data) => {
db.read_write(move |dao| insert_interest_data(data, dao))?;
}
Err(e) => {
log::warn!("error fetching interest data: {e}");
return Err(Error::FetchInterestDataError);
}
}
Ok(())
}
fn fetch_interest_data() -> Result<Vec<(Interest, UrlHash)>> {
let rs = Client::new(RemoteSettingsConfig {
collection_name: REMOTE_SETTINGS_COLLECTION.to_string(),
server: Some(RemoteSettingsServer::Prod),
server_url: None,
bucket_name: None,
})?;
fetch_interest_data_inner(rs)
}
/// Fetch the interest data
fn fetch_interest_data_inner(
rs: impl RelevancyRemoteSettingsClient,
) -> Result<Vec<(Interest, UrlHash)>> {
let remote_settings_response = rs.get_records()?;
let mut result = vec![];
for record in remote_settings_response.records {
let attachment_data = match &record.attachment {
None => return Err(Error::FetchInterestDataError),
Some(a) => rs.get_attachment(&a.location)?,
};
let interest = get_interest(&record)?;
let urls = get_hash_urls(attachment_data)?;
result.extend(std::iter::repeat(interest).zip(urls));
}
Ok(result)
}
fn get_hash_urls(attachment_data: Vec<u8>) -> Result<Vec<UrlHash>> {
let mut hash_urls = vec![];
let parsed_attachment_data =
serde_json::from_slice::<Vec<RelevancyAttachmentData>>(&attachment_data)?;
for attachment_data in parsed_attachment_data {
let hash_url = STANDARD
.decode(attachment_data.domain)
.map_err(|_| Error::Base64DecodeError("Invalid base64 error".to_string()))?;
let url_hash = hash_url.try_into().map_err(|_| {
Error::Base64DecodeError("Base64 string has wrong number of bytes".to_string())
})?;
hash_urls.push(url_hash);
}
Ok(hash_urls)
}
/// Extract Interest from the record info
fn get_interest(record: &RemoteSettingsRecord) -> Result<Interest> {
let record_fields: RelevancyRecord =
serde_json::from_value(serde_json::Value::Object(record.fields.clone()))?;
let custom_details = record_fields.record_custom_details;
let category_code = custom_details.category_to_domains.category_code;
Interest::try_from(category_code as u32)
}
/// Insert Interests into Db
fn insert_interest_data(data: Vec<(Interest, UrlHash)>, dao: &mut RelevancyDao) -> Result<()> {
for chunk in data.chunks(WRITE_CHUNK_SIZE) {
dao.err_if_interrupted()?;
for (interest, hash_url) in chunk {
dao.add_url_interest(*hash_url, *interest)?;
}
}
Ok(())
}
#[cfg(test)]
mod test {
use std::{cell::RefCell, collections::HashMap};
use anyhow::Context;
use remote_settings::RemoteSettingsResponse;
use serde_json::json;
use super::*;
use crate::{rs::RelevancyRemoteSettingsClient, url_hash::hash_url, InterestVector};
/// A snapshot containing fake Remote Settings records and attachments for
/// the store to ingest. We use snapshots to test the store's behavior in a
/// data-driven way.
struct Snapshot {
records: Vec<RemoteSettingsRecord>,
attachments: HashMap<&'static str, Vec<u8>>,
}
impl Snapshot {
/// Creates a snapshot from a JSON value that represents a collection of
/// Relevancy Remote Settings records.
///
/// You can use the [`serde_json::json!`] macro to construct the JSON
/// value, then pass it to this function. It's easier to use the
/// `Snapshot::with_records(json!(...))` idiom than to construct the
/// records by hand.
fn with_records(value: serde_json::Value) -> anyhow::Result<Self> {
Ok(Self {
records: serde_json::from_value(value)
.context("Couldn't create snapshot with Remote Settings records")?,
attachments: HashMap::new(),
})
}
/// Adds a data attachment to the snapshot.
fn with_data(
mut self,
location: &'static str,
value: serde_json::Value,
) -> anyhow::Result<Self> {
self.attachments.insert(
location,
serde_json::to_vec(&value).context("Couldn't add data attachment to snapshot")?,
);
Ok(self)
}
}
/// A fake Remote Settings client that returns records and attachments from
/// a snapshot.
struct SnapshotSettingsClient {
/// The current snapshot. You can modify it using
/// [`RefCell::borrow_mut()`] to simulate remote updates in tests.
snapshot: RefCell<Snapshot>,
}
impl SnapshotSettingsClient {
/// Creates a client with an initial snapshot.
fn with_snapshot(snapshot: Snapshot) -> Self {
Self {
snapshot: RefCell::new(snapshot),
}
}
}
impl RelevancyRemoteSettingsClient for SnapshotSettingsClient {
fn get_records(&self) -> Result<RemoteSettingsResponse> {
let records = self.snapshot.borrow().records.clone();
let last_modified = records
.iter()
.map(|record: &RemoteSettingsRecord| record.last_modified)
.max()
.unwrap_or(0);
Ok(RemoteSettingsResponse {
records,
last_modified,
})
}
fn get_attachment(&self, location: &str) -> Result<Vec<u8>> {
Ok(self
.snapshot
.borrow()
.attachments
.get(location)
.unwrap_or_else(|| unreachable!("Unexpected request for attachment `{}`", location))
.clone())
}
}
#[test]
fn test_interest_vectors() {
let db = RelevancyDb::new_for_test();
db.read_write(|dao| {
// Test that the interest data matches the values we started from in
// `bin/generate-test-data.rs`
dao.add_url_interest(hash_url("https://espn.com").unwrap(), Interest::Sports)?;
dao.add_url_interest(hash_url("https://dogs.com").unwrap(), Interest::Animals)?;
dao.add_url_interest(hash_url("https://cars.com").unwrap(), Interest::Autos)?;
dao.add_url_interest(
hash_url("https://www.vouge.com").unwrap(),
Interest::Fashion,
)?;
dao.add_url_interest(hash_url("https://slashdot.org").unwrap(), Interest::Tech)?;
dao.add_url_interest(hash_url("https://www.nascar.com").unwrap(), Interest::Autos)?;
dao.add_url_interest(
hash_url("https://www.nascar.com").unwrap(),
Interest::Sports,
)?;
dao.add_url_interest(
hash_url("https://unknown.url").unwrap(),
Interest::Inconclusive,
)?;
assert_eq!(
dao.get_url_interest_vector("https://espn.com/").unwrap(),
InterestVector {
sports: 1,
..InterestVector::default()
}
);
assert_eq!(
dao.get_url_interest_vector("https://dogs.com/").unwrap(),
InterestVector {
animals: 1,
..InterestVector::default()
}
);
assert_eq!(
dao.get_url_interest_vector("https://cars.com/").unwrap(),
InterestVector {
autos: 1,
..InterestVector::default()
}
);
assert_eq!(
dao.get_url_interest_vector("https://www.vouge.com/")
.unwrap(),
InterestVector {
fashion: 1,
..InterestVector::default()
}
);
assert_eq!(
dao.get_url_interest_vector("https://slashdot.org/")
.unwrap(),
InterestVector {
tech: 1,
..InterestVector::default()
}
);
assert_eq!(
dao.get_url_interest_vector("https://www.nascar.com/")
.unwrap(),
InterestVector {
autos: 1,
sports: 1,
..InterestVector::default()
}
);
assert_eq!(
dao.get_url_interest_vector("https://unknown.url/").unwrap(),
InterestVector {
inconclusive: 1,
..InterestVector::default()
}
);
Ok(())
})
.unwrap();
}
#[test]
fn test_variations_on_the_url() {
let db = RelevancyDb::new_for_test();
db.read_write(|dao| {
dao.add_url_interest(hash_url("https://espn.com").unwrap(), Interest::Sports)?;
dao.add_url_interest(hash_url("https://nascar.com").unwrap(), Interest::Autos)?;
dao.add_url_interest(hash_url("https://nascar.com").unwrap(), Interest::Sports)?;
// Different paths/queries should work
assert_eq!(
dao.get_url_interest_vector("https://espn.com/foo/bar/?baz")
.unwrap(),
InterestVector {
sports: 1,
..InterestVector::default()
}
);
// Different schemes should too
assert_eq!(
dao.get_url_interest_vector("http://espn.com/").unwrap(),
InterestVector {
sports: 1,
..InterestVector::default()
}
);
// But changes to the domain shouldn't
assert_eq!(
dao.get_url_interest_vector("http://espn2.com/").unwrap(),
InterestVector::default()
);
// However, extra components past the 2nd one in the domain are ignored
assert_eq!(
dao.get_url_interest_vector("https://www.nascar.com/")
.unwrap(),
InterestVector {
autos: 1,
sports: 1,
..InterestVector::default()
}
);
Ok(())
})
.unwrap();
}
#[test]
fn test_parse_records() -> anyhow::Result<()> {
let snapshot = Snapshot::with_records(json!([{
"id": "animals-0001",
"last_modified": 15,
"type": "category_to_domains",
"attachment": {
"filename": "data-1.json",
"mimetype": "application/json",
"location": "data-1.json",
"hash": "",
"size": 0
},
"record_custom_details": {
"category_to_domains": {
"category": "animals",
"category_code": 1,
"version": 1
}
}
}]))?
.with_data(
"data-1.json",
json!([
{"domain": "J2jtyjQtYQ/+/p//xhz43Q=="},
{"domain": "Zd4awCwGZLkat59nIWje3g=="}]),
)?;
let rs_client = SnapshotSettingsClient::with_snapshot(snapshot);
assert_eq!(
fetch_interest_data_inner(rs_client).unwrap(),
vec![
(Interest::Animals, hash_url("https://dogs.com").unwrap()),
(Interest::Animals, hash_url("https://cats.com").unwrap())
]
);
Ok(())
}
#[test]
fn test_parse_records_with_bad_domain_strings() -> anyhow::Result<()> {
let snapshot = Snapshot::with_records(json!([{
"id": "animals-0001",
"last_modified": 15,
"type": "category_to_domains",
"attachment": {
"filename": "data-1.json",
"mimetype": "application/json",
"location": "data-1.json",
"hash": "",
"size": 0
},
"record_custom_details": {
"category_to_domains": {
"category": "animals",
"category_code": 1,
"version": 1
}
}
}]))?
.with_data(
"data-1.json",
json!([
{"domain": "badString"},
{"domain": "notBase64"}]),
)?;
let rs_client = SnapshotSettingsClient::with_snapshot(snapshot);
fetch_interest_data_inner(rs_client).expect_err("Invalid base64 error");
Ok(())
}
}

View file

@ -2,33 +2,37 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use crate::Error;
/// List of possible interests for a domain. Domains can have be associated with one or multiple /// List of possible interests for a domain. Domains can have be associated with one or multiple
/// interests. `Inconclusive` is used for domains in the user's top sites that we can't classify /// interests. `Inconclusive` is used for domains in the user's top sites that we can't classify
/// because there's no corresponding entry in the interest database. /// because there's no corresponding entry in the interest database.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
#[repr(u32)] #[repr(u32)]
pub enum Interest { pub enum Interest {
Inconclusive, // Note: if you change these codes, make sure to update the `TryFrom<u32>` implementation and
Animals, // the `test_interest_code_conversion` test.
Arts, Inconclusive = 0,
Autos, Animals = 1,
Business, Arts = 2,
Career, Autos = 3,
Education, Business = 4,
Fashion, Career = 5,
Finance, Education = 6,
Food, Fashion = 7,
Government, Finance = 8,
Food = 9,
Government = 10,
//Disable this per policy consultation //Disable this per policy consultation
// Health, // Health = 11,
Hobbies, Hobbies = 12,
Home, Home = 13,
News, News = 14,
RealEstate, RealEstate = 15,
Society, Society = 16,
Sports, Sports = 17,
Tech, Tech = 18,
Travel, Travel = 19,
} }
impl From<Interest> for u32 { impl From<Interest> for u32 {
@ -43,14 +47,35 @@ impl From<Interest> for usize {
} }
} }
impl From<u32> for Interest { impl TryFrom<u32> for Interest {
fn from(code: u32) -> Self { // On error, return the invalid code back
if code as usize > Self::COUNT { type Error = Error;
panic!("Invalid interest code: {code}")
fn try_from(code: u32) -> Result<Self, Self::Error> {
match code {
0 => Ok(Self::Inconclusive),
1 => Ok(Self::Animals),
2 => Ok(Self::Arts),
3 => Ok(Self::Autos),
4 => Ok(Self::Business),
5 => Ok(Self::Career),
6 => Ok(Self::Education),
7 => Ok(Self::Fashion),
8 => Ok(Self::Finance),
9 => Ok(Self::Food),
10 => Ok(Self::Government),
//Disable this per policy consultation
// 11 => Ok(Self::Health),
12 => Ok(Self::Hobbies),
13 => Ok(Self::Home),
14 => Ok(Self::News),
15 => Ok(Self::RealEstate),
16 => Ok(Self::Society),
17 => Ok(Self::Sports),
18 => Ok(Self::Tech),
19 => Ok(Self::Travel),
n => Err(Error::InvalidInterestCode(n)),
} }
// Safety: This is safe since Interest has a u32 representation and we've done a bounds
// check
unsafe { std::mem::transmute(code) }
} }
} }
@ -111,6 +136,34 @@ pub struct InterestVector {
pub travel: u32, pub travel: u32,
} }
impl std::ops::Add for InterestVector {
type Output = Self;
fn add(self, other: Self) -> Self {
Self {
inconclusive: self.inconclusive + other.inconclusive,
animals: self.animals + other.animals,
arts: self.arts + other.arts,
autos: self.autos + other.autos,
business: self.business + other.business,
career: self.career + other.career,
education: self.education + other.education,
fashion: self.fashion + other.fashion,
finance: self.finance + other.finance,
food: self.food + other.food,
government: self.government + other.government,
hobbies: self.hobbies + other.hobbies,
home: self.home + other.home,
news: self.news + other.news,
real_estate: self.real_estate + other.real_estate,
society: self.society + other.society,
sports: self.sports + other.sports,
tech: self.tech + other.tech,
travel: self.travel + other.travel,
}
}
}
impl std::ops::Index<Interest> for InterestVector { impl std::ops::Index<Interest> for InterestVector {
type Output = u32; type Output = u32;
@ -166,3 +219,29 @@ impl std::ops::IndexMut<Interest> for InterestVector {
} }
} }
} }
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_interest_code_conversion() {
for interest in Interest::all() {
assert_eq!(Interest::try_from(u32::from(interest)).unwrap(), interest)
}
// try_from() for out of bounds codes should return an error
assert!(matches!(
Interest::try_from(20),
Err(Error::InvalidInterestCode(20))
));
assert!(matches!(
Interest::try_from(100),
Err(Error::InvalidInterestCode(100))
));
// Health is currently disabled, so it's code should return None for now
assert!(matches!(
Interest::try_from(11),
Err(Error::InvalidInterestCode(11))
));
}
}

View file

@ -11,8 +11,9 @@
mod db; mod db;
mod error; mod error;
mod ingest;
mod interest; mod interest;
mod populate_interests; mod rs;
mod schema; mod schema;
pub mod url_hash; pub mod url_hash;
@ -28,11 +29,10 @@ pub struct RelevancyStore {
/// Top-level API for the Relevancy component /// Top-level API for the Relevancy component
impl RelevancyStore { impl RelevancyStore {
#[handle_error(Error)] pub fn new(db_path: String) -> Self {
pub fn new(db_path: String) -> ApiResult<Self> { Self {
Ok(Self { db: RelevancyDb::new(db_path),
db: RelevancyDb::new(db_path)?, }
})
} }
pub fn close(&self) { pub fn close(&self) {
@ -55,9 +55,21 @@ impl RelevancyStore {
/// ///
/// This method may execute for a long time and should only be called from a worker thread. /// This method may execute for a long time and should only be called from a worker thread.
#[handle_error(Error)] #[handle_error(Error)]
pub fn ingest(&self, _top_urls_by_frecency: Vec<String>) -> ApiResult<()> { pub fn ingest(&self, top_urls_by_frecency: Vec<String>) -> ApiResult<InterestVector> {
populate_interests::ensure_interest_data_populated(&self.db)?; ingest::ensure_interest_data_populated(&self.db)?;
todo!() self.classify(top_urls_by_frecency)
}
pub fn classify(&self, top_urls_by_frecency: Vec<String>) -> Result<InterestVector> {
// For experimentation purposes we are going to return an interest vector.
// Eventually we would want to store this data in the DB and incrementally update it.
let mut interest_vector = InterestVector::default();
for url in top_urls_by_frecency {
let interest_count = self.db.read(|dao| dao.get_url_interest_vector(&url))?;
interest_vector = interest_vector + interest_count;
}
Ok(interest_vector)
} }
/// Calculate metrics for the validation phase /// Calculate metrics for the validation phase
@ -87,3 +99,45 @@ pub struct InterestMetrics {
} }
uniffi::include_scaffolding!("relevancy"); uniffi::include_scaffolding!("relevancy");
#[cfg(test)]
mod test {
use crate::url_hash::hash_url;
use super::*;
#[test]
fn test_ingest() {
let top_urls = vec![
"https://food.com/".to_string(),
"https://hello.com".to_string(),
"https://pasta.com".to_string(),
"https://dog.com".to_string(),
];
let relevancy_store =
RelevancyStore::new("file:test_store_data?mode=memory&cache=shared".to_owned());
relevancy_store
.db
.read_write(|dao| {
dao.add_url_interest(hash_url("https://food.com").unwrap(), Interest::Food)?;
dao.add_url_interest(
hash_url("https://hello.com").unwrap(),
Interest::Inconclusive,
)?;
dao.add_url_interest(hash_url("https://pasta.com").unwrap(), Interest::Food)?;
dao.add_url_interest(hash_url("https://dog.com").unwrap(), Interest::Animals)?;
Ok(())
})
.expect("Insert should succeed");
assert_eq!(
relevancy_store.ingest(top_urls).unwrap(),
InterestVector {
inconclusive: 1,
animals: 1,
food: 2,
..InterestVector::default()
}
);
}
}

View file

@ -1,164 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use crate::{url_hash::UrlHash, Error, Interest, RelevancyDb, Result};
use std::io::{Cursor, Read};
// Number of rows to write when inserting interest data before checking for interruption
const WRITE_CHUNK_SIZE: usize = 100;
pub fn ensure_interest_data_populated(db: &RelevancyDb) -> Result<()> {
if !db.read(|dao| dao.need_to_load_url_interests())? {
return Ok(());
}
let interest_data = match fetch_interest_data() {
Ok(data) => data,
Err(e) => {
log::warn!("error fetching interest data: {e}");
return Err(Error::FetchInterestDataError);
}
};
db.read_write(move |dao| {
for chunk in interest_data.chunks(WRITE_CHUNK_SIZE) {
for (url_hash, interest) in chunk {
dao.add_url_interest(*url_hash, *interest)?;
}
dao.err_if_interrupted()?;
}
Ok(())
})
}
/// Fetch the interest data
fn fetch_interest_data() -> std::io::Result<Vec<(UrlHash, Interest)>> {
// TODO: this hack should be replaced with something that fetches from remote settings.
// It should ideally check for interruption while fetching the data.
let bytes = include_bytes!("../test-data");
let mut reader = Cursor::new(&bytes);
let mut data = vec![];
// Loop over all possible interests
for interest in Interest::all() {
// read the count
let mut buf = [0u8; 4];
reader.read_exact(&mut buf)?;
let count = u32::from_le_bytes(buf);
for _ in 0..count {
let mut url_hash: UrlHash = [0u8; 16];
reader.read_exact(&mut url_hash)?;
data.push((url_hash, interest));
}
}
Ok(data)
}
#[cfg(test)]
mod test {
use super::*;
use crate::InterestVector;
#[test]
fn test_interest_vectors() {
let db = RelevancyDb::new_for_test();
ensure_interest_data_populated(&db).unwrap();
db.read(|dao| {
// Test that the interest data matches the values we started from in
// `bin/generate-test-data.rs`
assert_eq!(
dao.get_url_interest_vector("https://espn.com/").unwrap(),
InterestVector {
sports: 1,
..InterestVector::default()
}
);
assert_eq!(
dao.get_url_interest_vector("https://dogs.com/").unwrap(),
InterestVector {
animals: 1,
..InterestVector::default()
}
);
assert_eq!(
dao.get_url_interest_vector("https://cars.com/").unwrap(),
InterestVector {
autos: 1,
..InterestVector::default()
}
);
assert_eq!(
dao.get_url_interest_vector("https://www.vouge.com/")
.unwrap(),
InterestVector {
fashion: 1,
..InterestVector::default()
}
);
assert_eq!(
dao.get_url_interest_vector("https://slashdot.org/")
.unwrap(),
InterestVector {
tech: 1,
..InterestVector::default()
}
);
assert_eq!(
dao.get_url_interest_vector("https://www.nascar.com/")
.unwrap(),
InterestVector {
autos: 1,
sports: 1,
..InterestVector::default()
}
);
assert_eq!(
dao.get_url_interest_vector("https://unknown.url/").unwrap(),
InterestVector::default()
);
Ok(())
})
.unwrap();
}
#[test]
fn test_variations_on_the_url() {
let db = RelevancyDb::new_for_test();
ensure_interest_data_populated(&db).unwrap();
db.read(|dao| {
// Different paths/queries should work
assert_eq!(
dao.get_url_interest_vector("https://espn.com/foo/bar/?baz")
.unwrap(),
InterestVector {
sports: 1,
..InterestVector::default()
}
);
// Different schemes should too
assert_eq!(
dao.get_url_interest_vector("http://espn.com/").unwrap(),
InterestVector {
sports: 1,
..InterestVector::default()
}
);
// But changes to the domain shouldn't
assert_eq!(
dao.get_url_interest_vector("http://www.espn.com/").unwrap(),
InterestVector::default()
);
// However, extra components past the 3rd one in the domain are ignored
assert_eq!(
dao.get_url_interest_vector("https://foo.www.nascar.com/")
.unwrap(),
InterestVector {
autos: 1,
sports: 1,
..InterestVector::default()
}
);
Ok(())
})
.unwrap();
}
}

View file

@ -10,7 +10,6 @@ interface RelevancyStore {
// Construct a new RelevancyStore // Construct a new RelevancyStore
// //
// This is non-blocking since databases and other resources are lazily opened. // This is non-blocking since databases and other resources are lazily opened.
[Throws=RelevancyApiError]
constructor(string dbpath); constructor(string dbpath);
// Close any open resources (for example databases) // Close any open resources (for example databases)
@ -23,7 +22,7 @@ interface RelevancyStore {
// Ingest the top URLs by frequency to build up the user's interest vector // Ingest the top URLs by frequency to build up the user's interest vector
[Throws=RelevancyApiError] [Throws=RelevancyApiError]
void ingest(sequence<string> top_urls); InterestVector ingest(sequence<string> top_urls);
// Calculate metrics for the user's interest vector in order to measure how strongly we're // Calculate metrics for the user's interest vector in order to measure how strongly we're
// identifying interests. See the `InterestMetrics` struct for details. // identifying interests. See the `InterestMetrics` struct for details.

60
third_party/rust/relevancy/src/rs.rs vendored Normal file
View file

@ -0,0 +1,60 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
use crate::Result;
use remote_settings::RemoteSettingsResponse;
use serde::Deserialize;
/// The Remote Settings collection name.
pub(crate) const REMOTE_SETTINGS_COLLECTION: &str = "content-relevance";
/// A trait for a client that downloads records from Remote Settings.
///
/// This trait lets tests use a mock client.
pub(crate) trait RelevancyRemoteSettingsClient {
/// Fetches records from the Suggest Remote Settings collection.
fn get_records(&self) -> Result<RemoteSettingsResponse>;
/// Fetches a record's attachment from the Suggest Remote Settings
/// collection.
fn get_attachment(&self, location: &str) -> Result<Vec<u8>>;
}
impl RelevancyRemoteSettingsClient for remote_settings::Client {
fn get_records(&self) -> Result<RemoteSettingsResponse> {
Ok(remote_settings::Client::get_records(self)?)
}
fn get_attachment(&self, location: &str) -> Result<Vec<u8>> {
Ok(remote_settings::Client::get_attachment(self, location)?)
}
}
/// A record in the Relevancy Remote Settings collection.
#[derive(Clone, Debug, Deserialize)]
pub struct RelevancyRecord {
#[serde(rename = "type")]
pub record_type: String,
pub record_custom_details: RecordCustomDetails,
}
// Custom details related to category of the record.
#[derive(Clone, Debug, Deserialize)]
pub struct RecordCustomDetails {
pub category_to_domains: CategoryToDomains,
}
/// Category information related to the record.
#[derive(Clone, Debug, Deserialize)]
pub struct CategoryToDomains {
pub version: i32,
pub category: String,
pub category_code: i32,
}
/// A downloaded Remote Settings attachment that contains domain data.
#[derive(Clone, Debug, Deserialize)]
pub struct RelevancyAttachmentData {
pub domain: String,
}

View file

@ -8,11 +8,10 @@ use url::{Host, Url};
pub type UrlHash = [u8; 16]; pub type UrlHash = [u8; 16];
/// Given a URL, extract the part of it that we want to use to identify it. /// Given a URL, extract the part of it that we want to use to identify it.
///
/// We currently use the final 3 components of the URL domain.
///
/// TODO: decide if this should be 3 or 3 components.
pub fn url_hash_source(url: &str) -> Option<String> { pub fn url_hash_source(url: &str) -> Option<String> {
// We currently use the final 2 components of the URL domain.
const URL_COMPONENTS_TO_USE: usize = 2;
let url = Url::parse(url).ok()?; let url = Url::parse(url).ok()?;
let domain = match url.host() { let domain = match url.host() {
Some(Host::Domain(d)) => d, Some(Host::Domain(d)) => d,
@ -20,7 +19,7 @@ pub fn url_hash_source(url: &str) -> Option<String> {
}; };
// This will store indexes of `.` chars as we search backwards. // This will store indexes of `.` chars as we search backwards.
let mut pos = domain.len(); let mut pos = domain.len();
for _ in 0..3 { for _ in 0..URL_COMPONENTS_TO_USE {
match domain[0..pos].rfind('.') { match domain[0..pos].rfind('.') {
Some(p) => pos = p, Some(p) => pos = p,
// The domain has less than 3 dots, return it all // The domain has less than 3 dots, return it all
@ -47,12 +46,12 @@ mod test {
fn test_url_hash_source() { fn test_url_hash_source() {
let table = [ let table = [
("http://example.com/some-path", Some("example.com")), ("http://example.com/some-path", Some("example.com")),
("http://foo.example.com/some-path", Some("foo.example.com")), ("http://foo.example.com/some-path", Some("example.com")),
( (
"http://foo.bar.baz.example.com/some-path", "http://foo.bar.baz.example.com/some-path",
Some("baz.example.com"), Some("example.com"),
), ),
("http://foo.com.uk/some-path", Some("foo.com.uk")), ("http://foo.com.uk/some-path", Some("com.uk")),
("http://amazon.com/some-path", Some("amazon.com")), ("http://amazon.com/some-path", Some("amazon.com")),
("http://192.168.0.1/some-path", None), ("http://192.168.0.1/some-path", None),
]; ];

View file

@ -1 +1 @@
{"files":{"Cargo.toml":"05e4d7f7b3649a3e3fa441c4af53a633d18f20bb04fd761ed33fc9d461fd0dee","README.md":"fb72d0028586cab1421b853ef529d7ce78ad7316818b7733a4f3488b0fba67f7","benches/benchmark_all.rs":"c2343c9197b6d9ccb0798d7701b1b0d2569d494dd31a975d21d7ec6f26e32879","build.rs":"78780c5cccfe22c3ff4198624b9e188559c437c3e6fa1c8bb66548eee6aa66bf","src/benchmarks/README.md":"ee6d50df2c31cfd80a5bc047011b518dcf57f1ef928a811bb770f1a09f41b3de","src/benchmarks/client.rs":"4b2125031d740ca1ab468e76bbea777ac0bc4cc221b03b7bc2da773bed61dac5","src/benchmarks/ingest.rs":"1ffdc403fb945ea0b58353df9773ba45ab0e9082d61dd5330ad49fad8cbb5d9f","src/benchmarks/mod.rs":"fe1898ba4d783213525da10d92858ee84cebfd22749bad7aeb461d338fe5504a","src/bin/debug_ingestion_sizes.rs":"ce6e810be7b3fc19e826d75b622b82cfab5a1a99397a6d0833c2c4eebff2d364","src/config.rs":"206ae9dc768c755649cb0c88a7b1fc3c926c715441784f61e9dc06a8a02fc568","src/db.rs":"734f5fd9f36f03c07a508a9a353872b81107f5fe09f27294ba27d7e1249e3988","src/error.rs":"f563210a6c050d98ec85e0f6d9401e7373bfb816e865e8edabbabb23d848ba13","src/keyword.rs":"988d0ab021c0df19cfd3c519df7d37f606bf984cd14d0efca4e5a7aff88344dd","src/lib.rs":"18f988eb49626c6e186c8bc65a51b4a40d796f36d3de8905506f76c6e5e876cd","src/pocket.rs":"1316668840ec9b4ea886223921dc9d3b5a1731d1a5206c0b1089f2a6c45c1b7b","src/provider.rs":"fe76f19a223f5cac056c7d48525087ca2c26bf0629b0e11b1f8dc98d165c8bb2","src/rs.rs":"e3eabde58c859ebe1154bf8da56ca134ace135934e3f280acc8186b4204399b3","src/schema.rs":"88ff3ae6b652fa5a5cff4dc504d11a7fc33f1b2ee9716b970f646d9f9ca90ab7","src/store.rs":"5873438bfc2d2a3e112935bb196bcd1f9b46351d1b341113115f45f7117fc3bf","src/suggest.udl":"b49043c5ec0210aeccf92eadbc1acdce697fc588a2500a281e083b3d8c42ff73","src/suggestion.rs":"f31227779d13d1b03a622e08a417ceba4afb161885a01c2bc87a6a652b5e8be5","src/yelp.rs":"9c0dc02a994cc05df524aa4ef337d10f575d1891259193b6419fed6fe279cb54","uniffi.toml":"f26317442ddb5b3281245bef6e60ffcb78bb95d29fe4a351a56dbb88d4ec8aab"},"package":null} {"files":{"Cargo.toml":"05e4d7f7b3649a3e3fa441c4af53a633d18f20bb04fd761ed33fc9d461fd0dee","README.md":"fb72d0028586cab1421b853ef529d7ce78ad7316818b7733a4f3488b0fba67f7","benches/benchmark_all.rs":"c2343c9197b6d9ccb0798d7701b1b0d2569d494dd31a975d21d7ec6f26e32879","build.rs":"78780c5cccfe22c3ff4198624b9e188559c437c3e6fa1c8bb66548eee6aa66bf","src/benchmarks/README.md":"ee6d50df2c31cfd80a5bc047011b518dcf57f1ef928a811bb770f1a09f41b3de","src/benchmarks/client.rs":"4b2125031d740ca1ab468e76bbea777ac0bc4cc221b03b7bc2da773bed61dac5","src/benchmarks/ingest.rs":"1ffdc403fb945ea0b58353df9773ba45ab0e9082d61dd5330ad49fad8cbb5d9f","src/benchmarks/mod.rs":"fe1898ba4d783213525da10d92858ee84cebfd22749bad7aeb461d338fe5504a","src/bin/debug_ingestion_sizes.rs":"ce6e810be7b3fc19e826d75b622b82cfab5a1a99397a6d0833c2c4eebff2d364","src/config.rs":"206ae9dc768c755649cb0c88a7b1fc3c926c715441784f61e9dc06a8a02fc568","src/db.rs":"a4e18b9f45e0473ea64b5ecdf6d1d67e0519f9629d495c157b0bd1b47c3e2f4f","src/error.rs":"f563210a6c050d98ec85e0f6d9401e7373bfb816e865e8edabbabb23d848ba13","src/keyword.rs":"988d0ab021c0df19cfd3c519df7d37f606bf984cd14d0efca4e5a7aff88344dd","src/lib.rs":"18f988eb49626c6e186c8bc65a51b4a40d796f36d3de8905506f76c6e5e876cd","src/pocket.rs":"1316668840ec9b4ea886223921dc9d3b5a1731d1a5206c0b1089f2a6c45c1b7b","src/provider.rs":"fe76f19a223f5cac056c7d48525087ca2c26bf0629b0e11b1f8dc98d165c8bb2","src/rs.rs":"e3eabde58c859ebe1154bf8da56ca134ace135934e3f280acc8186b4204399b3","src/schema.rs":"88ff3ae6b652fa5a5cff4dc504d11a7fc33f1b2ee9716b970f646d9f9ca90ab7","src/store.rs":"aad193774eecec739a7debd1c9e4fd46df384e7a524203e5e5f0354b93f73c1c","src/suggest.udl":"bfa653aa88c954860a9728a597daad8f4a7db8c81bc156725bf801f7cddf8459","src/suggestion.rs":"f31227779d13d1b03a622e08a417ceba4afb161885a01c2bc87a6a652b5e8be5","src/yelp.rs":"9c0dc02a994cc05df524aa4ef337d10f575d1891259193b6419fed6fe279cb54","uniffi.toml":"f26317442ddb5b3281245bef6e60ffcb78bb95d29fe4a351a56dbb88d4ec8aab"},"package":null}

View file

@ -188,6 +188,12 @@ impl<'a> SuggestDao<'a> {
// //
// These methods implement CRUD operations // These methods implement CRUD operations
pub fn suggestions_table_empty(&self) -> Result<bool> {
Ok(self
.conn
.query_one::<bool>("SELECT NOT EXISTS (SELECT 1 FROM suggestions)")?)
}
/// Fetches suggestions that match the given query from the database. /// Fetches suggestions that match the given query from the database.
pub fn fetch_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> { pub fn fetch_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> {
let unique_providers = query.providers.iter().collect::<HashSet<_>>(); let unique_providers = query.providers.iter().collect::<HashSet<_>>();

View file

@ -275,6 +275,8 @@ pub struct SuggestIngestionConstraints {
/// soft limit, and the store might ingest more than requested. /// soft limit, and the store might ingest more than requested.
pub max_suggestions: Option<u64>, pub max_suggestions: Option<u64>,
pub providers: Option<Vec<SuggestionProvider>>, pub providers: Option<Vec<SuggestionProvider>>,
/// Only run ingestion if the table `suggestions` is empty
pub empty_only: bool,
} }
/// The implementation of the store. This is generic over the Remote Settings /// The implementation of the store. This is generic over the Remote Settings
@ -357,6 +359,10 @@ where
pub fn ingest(&self, constraints: SuggestIngestionConstraints) -> Result<()> { pub fn ingest(&self, constraints: SuggestIngestionConstraints) -> Result<()> {
let writer = &self.dbs()?.writer; let writer = &self.dbs()?.writer;
if constraints.empty_only && !writer.read(|dao| dao.suggestions_table_empty())? {
return Ok(());
}
if let Some(unparsable_records) = if let Some(unparsable_records) =
writer.read(|dao| dao.get_meta::<UnparsableRecords>(UNPARSABLE_RECORDS_META_KEY))? writer.read(|dao| dao.get_meta::<UnparsableRecords>(UNPARSABLE_RECORDS_META_KEY))?
{ {
@ -888,6 +894,12 @@ mod tests {
let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
// suggestions_table_empty returns true before the ingestion is complete
assert!(store
.dbs()?
.reader
.read(|dao| dao.suggestions_table_empty())?);
store.ingest(SuggestIngestionConstraints::default())?; store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| { store.dbs()?.reader.read(|dao| {
@ -927,6 +939,153 @@ mod tests {
Ok(()) Ok(())
})?; })?;
// suggestions_table_empty returns false after the ingestion is complete
assert!(!store
.dbs()?
.reader
.read(|dao| dao.suggestions_table_empty())?);
Ok(())
}
/// Tests ingesting suggestions into an empty database.
#[test]
fn ingest_empty_only() -> anyhow::Result<()> {
before_each();
// This ingestion should run, since the DB is empty
let snapshot = Snapshot::with_records(json!([{
"id": "1234",
"type": "data",
"last_modified": 15,
"attachment": {
"filename": "data-1.json",
"mimetype": "application/json",
"location": "data-1.json",
"hash": "",
"size": 0,
},
}]))?
.with_data(
"data-1.json",
json!([{
"id": 0,
"advertiser": "Los Pollos Hermanos",
"iab_category": "8 - Food & Drink",
"keywords": ["lo", "los", "los p", "los pollos", "los pollos h", "los pollos hermanos"],
"title": "Los Pollos Hermanos - Albuquerque",
"url": "https://www.lph-nm.biz",
"icon": "5678",
"impression_url": "https://example.com/impression_url",
"click_url": "https://example.com/click_url",
"score": 0.3
}]),
)?;
let mut store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
store.ingest(SuggestIngestionConstraints {
empty_only: true,
..SuggestIngestionConstraints::default()
})?;
store.dbs()?.reader.read(|dao| {
expect![[r#"
[
Amp {
title: "Los Pollos Hermanos - Albuquerque",
url: "https://www.lph-nm.biz",
raw_url: "https://www.lph-nm.biz",
icon: None,
icon_mimetype: None,
full_keyword: "los",
block_id: 0,
advertiser: "Los Pollos Hermanos",
iab_category: "8 - Food & Drink",
impression_url: "https://example.com/impression_url",
click_url: "https://example.com/click_url",
raw_click_url: "https://example.com/click_url",
score: 0.3,
},
]
"#]]
.assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
keyword: "lo".into(),
providers: vec![SuggestionProvider::Amp],
limit: None,
})?);
Ok(())
})?;
// ingestion should run with SuggestIngestionConstraints::empty_only = true, since the DB
// is empty
store.settings_client = SnapshotSettingsClient::with_snapshot(Snapshot::with_records(json!([{
"id": "1234",
"type": "data",
"last_modified": 15,
"attachment": {
"filename": "data-1.json",
"mimetype": "application/json",
"location": "data-1.json",
"hash": "",
"size": 0,
},
}, {
"id": "12345",
"type": "data",
"last_modified": 15,
"attachment": {
"filename": "data-2.json",
"mimetype": "application/json",
"location": "data-2.json",
"hash": "",
"size": 0,
},
}]))?
.with_data(
"data-1.json",
json!([{
"id": 0,
"advertiser": "Los Pollos Hermanos",
"iab_category": "8 - Food & Drink",
"keywords": ["lo", "los", "los p", "los pollos", "los pollos h", "los pollos hermanos"],
"title": "Los Pollos Hermanos - Albuquerque",
"url": "https://www.lph-nm.biz",
"icon": "5678",
"impression_url": "https://example.com/impression_url",
"click_url": "https://example.com/click_url",
"score": 0.3
}])
)?
.with_data("data-2.json", json!([{
"id": 1,
"advertiser": "Good Place Eats",
"iab_category": "8 - Food & Drink",
"keywords": ["la", "las", "lasa", "lasagna", "lasagna come out tomorrow"],
"title": "Lasagna Come Out Tomorrow",
"url": "https://www.lasagna.restaurant",
"icon": "2",
"impression_url": "https://example.com/impression_url",
"click_url": "https://example.com/click_url"
}]),
)?);
store.ingest(SuggestIngestionConstraints {
empty_only: true,
..SuggestIngestionConstraints::default()
})?;
store.dbs()?.reader.read(|dao| {
expect![[r#"
[]
"#]]
.assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
keyword: "la".into(),
providers: vec![SuggestionProvider::Amp],
limit: None,
})?);
Ok(())
})?;
Ok(()) Ok(())
} }
@ -2212,6 +2371,7 @@ mod tests {
store.ingest(SuggestIngestionConstraints { store.ingest(SuggestIngestionConstraints {
max_suggestions: Some(max_suggestions), max_suggestions: Some(max_suggestions),
providers: Some(vec![SuggestionProvider::Amp]), providers: Some(vec![SuggestionProvider::Amp]),
..SuggestIngestionConstraints::default()
})?; })?;
let actual_limit = store let actual_limit = store
.settings_client .settings_client
@ -5201,6 +5361,7 @@ mod tests {
let constraints = SuggestIngestionConstraints { let constraints = SuggestIngestionConstraints {
max_suggestions: Some(100), max_suggestions: Some(100),
providers: Some(vec![SuggestionProvider::Amp, SuggestionProvider::Pocket]), providers: Some(vec![SuggestionProvider::Amp, SuggestionProvider::Pocket]),
..SuggestIngestionConstraints::default()
}; };
store.ingest(constraints)?; store.ingest(constraints)?;

View file

@ -106,6 +106,14 @@ dictionary SuggestionQuery {
dictionary SuggestIngestionConstraints { dictionary SuggestIngestionConstraints {
u64? max_suggestions = null; u64? max_suggestions = null;
sequence<SuggestionProvider>? providers = null; sequence<SuggestionProvider>? providers = null;
// Only ingest if the table `suggestions` is empty.
//
// This is indented to handle periodic updates. Consumers can schedule an ingest with
// `empty_only=true` on startup and a regular ingest with `empty_only=false` to run on a long periodic schedule (maybe
// once a day). This allows ingestion to normally be run at a slow, periodic rate. However, if
// there is a schema upgrade that causes the database to be thrown away, then the
// `empty_only=true` ingestion that runs on startup will repopulate it.
boolean empty_only = false;
}; };
dictionary SuggestGlobalConfig { dictionary SuggestGlobalConfig {

View file

@ -11,6 +11,7 @@ ChromeUtils.defineESModuleGetters(lazy, {
"resource://gre/modules/contentrelevancy/private/InputUtils.sys.mjs", "resource://gre/modules/contentrelevancy/private/InputUtils.sys.mjs",
NimbusFeatures: "resource://nimbus/ExperimentAPI.sys.mjs", NimbusFeatures: "resource://nimbus/ExperimentAPI.sys.mjs",
RelevancyStore: "resource://gre/modules/RustRelevancy.sys.mjs", RelevancyStore: "resource://gre/modules/RustRelevancy.sys.mjs",
InterestVector: "resource://gre/modules/RustRelevancy.sys.mjs",
}); });
XPCOMUtils.defineLazyServiceGetter( XPCOMUtils.defineLazyServiceGetter(
@ -40,6 +41,7 @@ const NIMBUS_VARIABLE_ENABLED = "enabled";
const NIMBUS_VARIABLE_MAX_INPUT_URLS = "maxInputUrls"; const NIMBUS_VARIABLE_MAX_INPUT_URLS = "maxInputUrls";
const NIMBUS_VARIABLE_MIN_INPUT_URLS = "minInputUrls"; const NIMBUS_VARIABLE_MIN_INPUT_URLS = "minInputUrls";
const NIMBUS_VARIABLE_TIMER_INTERVAL = "timerInterval"; const NIMBUS_VARIABLE_TIMER_INTERVAL = "timerInterval";
const NIMBUS_VARIABLE_INGEST_ENABLED = "ingestEnabled";
ChromeUtils.defineLazyGetter(lazy, "log", () => { ChromeUtils.defineLazyGetter(lazy, "log", () => {
return console.createInstance({ return console.createInstance({
@ -243,18 +245,21 @@ class RelevancyManager {
lazy.log.info("Starting interest classification"); lazy.log.info("Starting interest classification");
timerId = Glean.relevancyClassify.duration.start(); timerId = Glean.relevancyClassify.duration.start();
await this.#doClassificationHelper(urls); const interestVector = await this.#doClassificationHelper(urls);
const sortedVector = Object.entries(interestVector).sort(
([, a], [, b]) => b - a // descending
);
lazy.log.info(`Classification results: ${JSON.stringify(sortedVector)}`);
Glean.relevancyClassify.duration.stopAndAccumulate(timerId); Glean.relevancyClassify.duration.stopAndAccumulate(timerId);
Glean.relevancyClassify.succeed.record({ Glean.relevancyClassify.succeed.record({
input_size: urls.length, input_size: urls.length,
// TODO(nanj): Fill out the actual counters once the classification is enabled. input_classified_size: sortedVector.reduce((acc, [, v]) => acc + v, 0),
input_classified_size: 0, input_inconclusive_size: interestVector.inconclusive,
input_inconclusive_size: 0, output_interest_size: sortedVector.filter(([, v]) => v != 0).length,
output_interest_size: 0, interest_top_1_hits: sortedVector[0][1],
interest_top_1_hits: 0, interest_top_2_hits: sortedVector[1][1],
interest_top_2_hits: 0, interest_top_3_hits: sortedVector[2][1],
interest_top_3_hits: 0,
}); });
} catch (error) { } catch (error) {
let reason; let reason;
@ -290,28 +295,48 @@ class RelevancyManager {
* *
* @param {Array} urls * @param {Array} urls
* An array of URLs. * An array of URLs.
* @returns {InterestVector}
* An interest vector.
* @throws {StoreNotAvailableError} * @throws {StoreNotAvailableError}
* Thrown when the store became unavailable (i.e. set to null elsewhere). * Thrown when the store became unavailable (i.e. set to null elsewhere).
* @throws {RelevancyAPIError} * @throws {RelevancyAPIError}
* Thrown for other API errors on the store. * Thrown for other API errors on the store.
*/ */
async #doClassificationHelper(urls) { async #doClassificationHelper(urls) {
// The following logs are unnecessary, only used to suppress the linting error.
// TODO(nanj): delete me once the following TODO is done.
if (!this.#store) {
lazy.log.error("#store became null, aborting interest classification");
}
lazy.log.info("Classification input: " + urls); lazy.log.info("Classification input: " + urls);
// TODO(nanj): uncomment the following once `ingest()` is implemented. let interestVector = new lazy.InterestVector({
// await this.#store.ingest(urls); animals: 0,
} arts: 0,
autos: 0,
business: 0,
career: 0,
education: 0,
fashion: 0,
finance: 0,
food: 0,
government: 0,
hobbies: 0,
home: 0,
news: 0,
realEstate: 0,
society: 0,
sports: 0,
tech: 0,
travel: 0,
inconclusive: 0,
});
/** if (
* Exposed for testing. lazy.NimbusFeatures.contentRelevancy.getVariable(
*/ NIMBUS_VARIABLE_INGEST_ENABLED
async _test_doClassificationHelper(urls) { ) ??
await this.#doClassificationHelper(urls); false
) {
interestVector = await this.#store.ingest(urls);
}
return interestVector;
} }
/** /**

View file

@ -41,6 +41,7 @@ add_task(async function test_NimbusIntegration_enable() {
maxInputUrls: 3, maxInputUrls: 3,
// Set the timer interval to 0 will trigger the timer right away. // Set the timer interval to 0 will trigger the timer right away.
timerInterval: 0, timerInterval: 0,
ingestEnabled: false,
}, },
}); });
@ -73,6 +74,7 @@ add_task(async function test_NimbusIntegration_disable() {
maxInputUrls: 3, maxInputUrls: 3,
// Set the timer interval to 0 will trigger the timer right away. // Set the timer interval to 0 will trigger the timer right away.
timerInterval: 0, timerInterval: 0,
ingestEnabled: false,
}, },
}); });

View file

@ -100,24 +100,6 @@ add_task(async function test_call_disable_twice() {
Services.prefs.clearUserPref(PREF_CONTENT_RELEVANCY_ENABLED); Services.prefs.clearUserPref(PREF_CONTENT_RELEVANCY_ENABLED);
}); });
add_task(async function test_doClassificationHelper() {
Services.prefs.setBoolPref(PREF_CONTENT_RELEVANCY_ENABLED, true);
await TestUtils.waitForCondition(() => ContentRelevancyManager._isStoreReady);
await ContentRelevancyManager._test_doClassificationHelper([]);
// Disable it to reset the store.
Services.prefs.setBoolPref(PREF_CONTENT_RELEVANCY_ENABLED, false);
await TestUtils.waitForTick();
await Assert.rejects(
ContentRelevancyManager._test_doClassificationHelper([]),
/Store is not available/,
"Should throw with an unset store"
);
Services.prefs.clearUserPref(PREF_CONTENT_RELEVANCY_ENABLED);
});
/** /**
* Sets up the update timer manager for testing: makes it fire more often, * Sets up the update timer manager for testing: makes it fire more often,
* removes all existing timers, and initializes it for testing. The body of this * removes all existing timers, and initializes it for testing. The body of this

View file

@ -7,10 +7,6 @@ const { ContentRelevancyManager } = ChromeUtils.importESModule(
"resource://gre/modules/ContentRelevancyManager.sys.mjs" "resource://gre/modules/ContentRelevancyManager.sys.mjs"
); );
const { TestUtils } = ChromeUtils.importESModule(
"resource://testing-common/TestUtils.sys.mjs"
);
const PREF_CONTENT_RELEVANCY_ENABLED = "toolkit.contentRelevancy.enabled"; const PREF_CONTENT_RELEVANCY_ENABLED = "toolkit.contentRelevancy.enabled";
add_setup(async function setup() { add_setup(async function setup() {
@ -83,39 +79,3 @@ add_task(async function test_classify_fail_case1() {
"Should not record the duration" "Should not record the duration"
); );
}); });
/**
* Test classification metrics - fail - store-not-ready.
*/
add_task(async function test_classify_fail_case2() {
Services.fog.testResetFOG();
// Toggle the pref to disable the manager and nullify the store.
Services.prefs.setBoolPref(PREF_CONTENT_RELEVANCY_ENABLED, false);
await TestUtils.waitForTick();
await TestUtils.waitForCondition(
() => !ContentRelevancyManager.shouldEnable,
"Should be disabled via pref"
);
Assert.equal(null, Glean.relevancyClassify.fail.testGetValue());
Assert.equal(null, Glean.relevancyClassify.duration.testGetValue());
await ContentRelevancyManager._test_doClassification();
Assert.deepEqual(
{
reason: "store-not-ready",
},
Glean.relevancyClassify.fail.testGetValue()[0].extra,
"Should record the fail event"
);
Assert.equal(
null,
Glean.relevancyClassify.duration.testGetValue(),
"Should not record the duration"
);
Services.prefs.setBoolPref(PREF_CONTENT_RELEVANCY_ENABLED, true);
});

View file

@ -2701,6 +2701,10 @@ contentRelevancy:
setPref: setPref:
branch: user branch: user
pref: toolkit.contentRelevancy.timerInterval pref: toolkit.contentRelevancy.timerInterval
ingestEnabled:
description: Enable the ingestion through the Rust component
type: boolean
fallbackPref: toolkit.contentRelevancy.ingestEnabled
tabPreview: tabPreview:
description: Prefs to control Tab Previews description: Prefs to control Tab Previews

View file

@ -322,7 +322,7 @@ export class RelevancyStore {
*/ */
static init(dbpath) { static init(dbpath) {
const liftResult = (result) => FfiConverterTypeRelevancyStore.lift(result); const liftResult = (result) => FfiConverterTypeRelevancyStore.lift(result);
const liftError = (data) => FfiConverterTypeRelevancyApiError.lift(data); const liftError = null;
const functionCall = () => { const functionCall = () => {
try { try {
FfiConverterString.checkType(dbpath) FfiConverterString.checkType(dbpath)
@ -368,7 +368,7 @@ export class RelevancyStore {
} }
ingest(topUrls) { ingest(topUrls) {
const liftResult = (result) => undefined; const liftResult = (result) => FfiConverterTypeInterestVector.lift(result);
const liftError = (data) => FfiConverterTypeRelevancyApiError.lift(data); const liftError = (data) => FfiConverterTypeRelevancyApiError.lift(data);
const functionCall = () => { const functionCall = () => {
try { try {

View file

@ -915,7 +915,7 @@ export class FfiConverterTypeSuggestGlobalConfig extends FfiConverterArrayBuffer
} }
export class SuggestIngestionConstraints { export class SuggestIngestionConstraints {
constructor({ maxSuggestions = null, providers = null } = {}) { constructor({ maxSuggestions = null, providers = null, emptyOnly = false } = {}) {
try { try {
FfiConverterOptionalu64.checkType(maxSuggestions) FfiConverterOptionalu64.checkType(maxSuggestions)
} catch (e) { } catch (e) {
@ -932,13 +932,23 @@ export class SuggestIngestionConstraints {
} }
throw e; throw e;
} }
try {
FfiConverterBool.checkType(emptyOnly)
} catch (e) {
if (e instanceof UniFFITypeError) {
e.addItemDescriptionPart("emptyOnly");
}
throw e;
}
this.maxSuggestions = maxSuggestions; this.maxSuggestions = maxSuggestions;
this.providers = providers; this.providers = providers;
this.emptyOnly = emptyOnly;
} }
equals(other) { equals(other) {
return ( return (
this.maxSuggestions == other.maxSuggestions && this.maxSuggestions == other.maxSuggestions &&
this.providers == other.providers this.providers == other.providers &&
this.emptyOnly == other.emptyOnly
) )
} }
} }
@ -949,17 +959,20 @@ export class FfiConverterTypeSuggestIngestionConstraints extends FfiConverterArr
return new SuggestIngestionConstraints({ return new SuggestIngestionConstraints({
maxSuggestions: FfiConverterOptionalu64.read(dataStream), maxSuggestions: FfiConverterOptionalu64.read(dataStream),
providers: FfiConverterOptionalSequenceTypeSuggestionProvider.read(dataStream), providers: FfiConverterOptionalSequenceTypeSuggestionProvider.read(dataStream),
emptyOnly: FfiConverterBool.read(dataStream),
}); });
} }
static write(dataStream, value) { static write(dataStream, value) {
FfiConverterOptionalu64.write(dataStream, value.maxSuggestions); FfiConverterOptionalu64.write(dataStream, value.maxSuggestions);
FfiConverterOptionalSequenceTypeSuggestionProvider.write(dataStream, value.providers); FfiConverterOptionalSequenceTypeSuggestionProvider.write(dataStream, value.providers);
FfiConverterBool.write(dataStream, value.emptyOnly);
} }
static computeSize(value) { static computeSize(value) {
let totalSize = 0; let totalSize = 0;
totalSize += FfiConverterOptionalu64.computeSize(value.maxSuggestions); totalSize += FfiConverterOptionalu64.computeSize(value.maxSuggestions);
totalSize += FfiConverterOptionalSequenceTypeSuggestionProvider.computeSize(value.providers); totalSize += FfiConverterOptionalSequenceTypeSuggestionProvider.computeSize(value.providers);
totalSize += FfiConverterBool.computeSize(value.emptyOnly);
return totalSize return totalSize
} }
@ -984,6 +997,14 @@ export class FfiConverterTypeSuggestIngestionConstraints extends FfiConverterArr
} }
throw e; throw e;
} }
try {
FfiConverterBool.checkType(value.emptyOnly);
} catch (e) {
if (e instanceof UniFFITypeError) {
e.addItemDescriptionPart(".emptyOnly");
}
throw e;
}
} }
} }

View file

@ -29,7 +29,7 @@ extern "C" {
void * uniffi_relevancy_fn_constructor_relevancystore_new(RustBuffer, RustCallStatus*); void * uniffi_relevancy_fn_constructor_relevancystore_new(RustBuffer, RustCallStatus*);
RustBuffer uniffi_relevancy_fn_method_relevancystore_calculate_metrics(void *, RustCallStatus*); RustBuffer uniffi_relevancy_fn_method_relevancystore_calculate_metrics(void *, RustCallStatus*);
void uniffi_relevancy_fn_method_relevancystore_close(void *, RustCallStatus*); void uniffi_relevancy_fn_method_relevancystore_close(void *, RustCallStatus*);
void uniffi_relevancy_fn_method_relevancystore_ingest(void *, RustBuffer, RustCallStatus*); RustBuffer uniffi_relevancy_fn_method_relevancystore_ingest(void *, RustBuffer, RustCallStatus*);
void uniffi_relevancy_fn_method_relevancystore_interrupt(void *, RustCallStatus*); void uniffi_relevancy_fn_method_relevancystore_interrupt(void *, RustCallStatus*);
RustBuffer uniffi_relevancy_fn_method_relevancystore_user_interest_vector(void *, RustCallStatus*); RustBuffer uniffi_relevancy_fn_method_relevancystore_user_interest_vector(void *, RustCallStatus*);
void * uniffi_remote_settings_fn_clone_remotesettings(void *, RustCallStatus*); void * uniffi_remote_settings_fn_clone_remotesettings(void *, RustCallStatus*);
@ -144,7 +144,7 @@ Maybe<already_AddRefed<Promise>> UniFFICallAsync(const GlobalObject& aGlobal, ui
return Some(CallHandler::CallAsync(uniffi_relevancy_fn_method_relevancystore_close, aGlobal, aArgs, "uniffi_relevancy_fn_method_relevancystore_close: "_ns, aError)); return Some(CallHandler::CallAsync(uniffi_relevancy_fn_method_relevancystore_close, aGlobal, aArgs, "uniffi_relevancy_fn_method_relevancystore_close: "_ns, aError));
} }
case 4: { // relevancy:uniffi_relevancy_fn_method_relevancystore_ingest case 4: { // relevancy:uniffi_relevancy_fn_method_relevancystore_ingest
using CallHandler = ScaffoldingCallHandler<ScaffoldingConverter<void>, ScaffoldingObjectConverter<&kRelevancyRelevancyStorePointerType>, ScaffoldingConverter<RustBuffer>>; using CallHandler = ScaffoldingCallHandler<ScaffoldingConverter<RustBuffer>, ScaffoldingObjectConverter<&kRelevancyRelevancyStorePointerType>, ScaffoldingConverter<RustBuffer>>;
return Some(CallHandler::CallAsync(uniffi_relevancy_fn_method_relevancystore_ingest, aGlobal, aArgs, "uniffi_relevancy_fn_method_relevancystore_ingest: "_ns, aError)); return Some(CallHandler::CallAsync(uniffi_relevancy_fn_method_relevancystore_ingest, aGlobal, aArgs, "uniffi_relevancy_fn_method_relevancystore_ingest: "_ns, aError));
} }
case 5: { // relevancy:uniffi_relevancy_fn_method_relevancystore_interrupt case 5: { // relevancy:uniffi_relevancy_fn_method_relevancystore_interrupt
@ -354,7 +354,7 @@ bool UniFFICallSync(const GlobalObject& aGlobal, uint64_t aId, const Sequence<Un
return true; return true;
} }
case 4: { // relevancy:uniffi_relevancy_fn_method_relevancystore_ingest case 4: { // relevancy:uniffi_relevancy_fn_method_relevancystore_ingest
using CallHandler = ScaffoldingCallHandler<ScaffoldingConverter<void>, ScaffoldingObjectConverter<&kRelevancyRelevancyStorePointerType>, ScaffoldingConverter<RustBuffer>>; using CallHandler = ScaffoldingCallHandler<ScaffoldingConverter<RustBuffer>, ScaffoldingObjectConverter<&kRelevancyRelevancyStorePointerType>, ScaffoldingConverter<RustBuffer>>;
CallHandler::CallSync(uniffi_relevancy_fn_method_relevancystore_ingest, aGlobal, aArgs, aReturnValue, "uniffi_relevancy_fn_method_relevancystore_ingest: "_ns, aError); CallHandler::CallSync(uniffi_relevancy_fn_method_relevancystore_ingest, aGlobal, aArgs, aReturnValue, "uniffi_relevancy_fn_method_relevancystore_ingest: "_ns, aError);
return true; return true;
} }