forked from mirrors/gecko-dev
Bug 1895888 - Vendor fully-implemented Rust relevancy component r=bdk
Differential Revision: https://phabricator.services.mozilla.com/D209964
This commit is contained in:
parent
2f9ab8fab6
commit
eb54ba3346
27 changed files with 945 additions and 326 deletions
|
|
@ -60,9 +60,9 @@ git = "https://github.com/mozilla-spidermonkey/jsparagus"
|
|||
rev = "61f399c53a641ebd3077c1f39f054f6d396a633c"
|
||||
replace-with = "vendored-sources"
|
||||
|
||||
[source."git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434"]
|
||||
[source."git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a"]
|
||||
git = "https://github.com/mozilla/application-services"
|
||||
rev = "bf37a5174a1dcb7a890dc35386d58f9b77f82434"
|
||||
rev = "e0563d725f852f617878ecc13a03cdf50c85cd5a"
|
||||
replace-with = "vendored-sources"
|
||||
|
||||
[source."git+https://github.com/mozilla/audioipc?rev=409e11f8de6288e9ddfe269654523735302e59e6"]
|
||||
|
|
|
|||
31
Cargo.lock
generated
31
Cargo.lock
generated
|
|
@ -1680,7 +1680,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "error-support"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
|
||||
dependencies = [
|
||||
"error-support-macros",
|
||||
"lazy_static",
|
||||
|
|
@ -1692,7 +1692,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "error-support-macros"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
|
@ -2965,7 +2965,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "interrupt-support"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"parking_lot",
|
||||
|
|
@ -4173,7 +4173,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "nss_build_common"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
|
||||
|
||||
[[package]]
|
||||
name = "nsstring"
|
||||
|
|
@ -4827,14 +4827,19 @@ checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
|
|||
[[package]]
|
||||
name = "relevancy"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"base64 0.21.3",
|
||||
"error-support",
|
||||
"interrupt-support",
|
||||
"log",
|
||||
"md-5",
|
||||
"parking_lot",
|
||||
"remote_settings",
|
||||
"rusqlite",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sql-support",
|
||||
"thiserror",
|
||||
"uniffi",
|
||||
|
|
@ -4844,7 +4849,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "remote_settings"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
|
||||
dependencies = [
|
||||
"parking_lot",
|
||||
"serde",
|
||||
|
|
@ -5372,7 +5377,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "sql-support"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
|
||||
dependencies = [
|
||||
"ffi-support",
|
||||
"interrupt-support",
|
||||
|
|
@ -5554,7 +5559,7 @@ checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
|
|||
[[package]]
|
||||
name = "suggest"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
|
|
@ -5603,7 +5608,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "sync-guid"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
|
||||
dependencies = [
|
||||
"base64 0.21.3",
|
||||
"rand",
|
||||
|
|
@ -5614,7 +5619,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "sync15"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"error-support",
|
||||
|
|
@ -5646,7 +5651,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "tabs"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"error-support",
|
||||
|
|
@ -6319,7 +6324,7 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
|||
[[package]]
|
||||
name = "viaduct"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
|
||||
dependencies = [
|
||||
"ffi-support",
|
||||
"log",
|
||||
|
|
@ -6467,7 +6472,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "webext-storage"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=bf37a5174a1dcb7a890dc35386d58f9b77f82434#bf37a5174a1dcb7a890dc35386d58f9b77f82434"
|
||||
source = "git+https://github.com/mozilla/application-services?rev=e0563d725f852f617878ecc13a03cdf50c85cd5a#e0563d725f852f617878ecc13a03cdf50c85cd5a"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"error-support",
|
||||
|
|
|
|||
16
Cargo.toml
16
Cargo.toml
|
|
@ -223,14 +223,14 @@ warp = { git = "https://github.com/seanmonstar/warp", rev = "9d081461ae1167eb321
|
|||
malloc_size_of_derive = { path = "xpcom/rust/malloc_size_of_derive" }
|
||||
|
||||
# application-services overrides to make updating them all simpler.
|
||||
interrupt-support = { git = "https://github.com/mozilla/application-services", rev = "bf37a5174a1dcb7a890dc35386d58f9b77f82434" }
|
||||
relevancy = { git = "https://github.com/mozilla/application-services", rev = "bf37a5174a1dcb7a890dc35386d58f9b77f82434" }
|
||||
sql-support = { git = "https://github.com/mozilla/application-services", rev = "bf37a5174a1dcb7a890dc35386d58f9b77f82434" }
|
||||
suggest = { git = "https://github.com/mozilla/application-services", rev = "bf37a5174a1dcb7a890dc35386d58f9b77f82434" }
|
||||
sync15 = { git = "https://github.com/mozilla/application-services", rev = "bf37a5174a1dcb7a890dc35386d58f9b77f82434" }
|
||||
tabs = { git = "https://github.com/mozilla/application-services", rev = "bf37a5174a1dcb7a890dc35386d58f9b77f82434" }
|
||||
viaduct = { git = "https://github.com/mozilla/application-services", rev = "bf37a5174a1dcb7a890dc35386d58f9b77f82434" }
|
||||
webext-storage = { git = "https://github.com/mozilla/application-services", rev = "bf37a5174a1dcb7a890dc35386d58f9b77f82434" }
|
||||
interrupt-support = { git = "https://github.com/mozilla/application-services", rev = "e0563d725f852f617878ecc13a03cdf50c85cd5a" }
|
||||
relevancy = { git = "https://github.com/mozilla/application-services", rev = "e0563d725f852f617878ecc13a03cdf50c85cd5a" }
|
||||
sql-support = { git = "https://github.com/mozilla/application-services", rev = "e0563d725f852f617878ecc13a03cdf50c85cd5a" }
|
||||
suggest = { git = "https://github.com/mozilla/application-services", rev = "e0563d725f852f617878ecc13a03cdf50c85cd5a" }
|
||||
sync15 = { git = "https://github.com/mozilla/application-services", rev = "e0563d725f852f617878ecc13a03cdf50c85cd5a" }
|
||||
tabs = { git = "https://github.com/mozilla/application-services", rev = "e0563d725f852f617878ecc13a03cdf50c85cd5a" }
|
||||
viaduct = { git = "https://github.com/mozilla/application-services", rev = "e0563d725f852f617878ecc13a03cdf50c85cd5a" }
|
||||
webext-storage = { git = "https://github.com/mozilla/application-services", rev = "e0563d725f852f617878ecc13a03cdf50c85cd5a" }
|
||||
|
||||
# Patch mio 0.8.8 to use windows-sys 0.52 (backport https://github.com/tokio-rs/mio/commit/eea9e3e0c469480e5c59c01e6c3c7e5fd88f0848)
|
||||
mio_0_8 = { package = "mio", git = "https://github.com/glandium/mio", rev = "9a2ef335c366044ffe73b1c4acabe50a1daefe05" }
|
||||
|
|
|
|||
|
|
@ -3047,3 +3047,5 @@ pref("startup.homepage_override_nimbus_maxVersion", "");
|
|||
|
||||
// Pref to enable the content relevancy feature.
|
||||
pref("toolkit.contentRelevancy.enabled", false);
|
||||
// Pref to enable the ingestion through the Rust component.
|
||||
pref("toolkit.contentRelevancy.ingestEnabled", false);
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
{"files":{"Cargo.toml":"8b8d81c6af8ab402f8febf103e10917a55886cacb01d0448c4426a2b54d244d0","build.rs":"a562bfe527d21c4e8a1a44b892defa83cdff141ec5dd51ed6f3862330e50ddd7","src/bin/generate-test-data.rs":"7f1c9dc445418c7627f89d1f2aa8e550d0f85b3d1f05edb7c378ab9441714f1f","src/db.rs":"9470c4566fc6296571b35d493f752d8a1f3c1fd8f7f420007ee3fa3e762af92b","src/error.rs":"00a10d47c9cfd25c4104174ec07eca6a08103564cb1b2c4961739f17f2892fb2","src/interest.rs":"e28b51c9692905ca87e6ab23decf1c1b3897bf29cb3d0d61f71213553b561dcc","src/lib.rs":"a42ffd826fd38a5f9436d9de00fd7b548e233a39063fbc030cae10052e0b4253","src/populate_interests.rs":"96c825796c6cfb7b1bb3a11c6d1b9c3639107943f5d35a259e195fec15aeef4e","src/relevancy.udl":"3de62ea53b4f34c11ff94c782b8389d58525ca40bb292b4b81370025813def5e","src/schema.rs":"f782c712f10c4f1af2f9e1424d6b52f59a2bacfcc452a8feb763f36478f5dd5d","src/url_hash.rs":"5619a249d471e7b642d889bad09e93212559c8b947010d49492c1423da2b310e","test-data":"1ef2cd092d59e7e126cd4a514af983d449ed9f9c98708702fd237464a76c2b5e"},"package":null}
|
||||
{"files":{"Cargo.toml":"2b7bf33e20b6aa768dd18619845e9d5d22235d86f770e94b250ed0052662ce2d","build.rs":"a562bfe527d21c4e8a1a44b892defa83cdff141ec5dd51ed6f3862330e50ddd7","src/bin/generate-test-data.rs":"7f1c9dc445418c7627f89d1f2aa8e550d0f85b3d1f05edb7c378ab9441714f1f","src/db.rs":"7ca5688c42d44ad6e5320208257d131c5c744be47a1cfe3e1380147abf2aadc3","src/error.rs":"0fe48e211dffb2010f732672c38e1c79b1995df3e70b06398ed8ac43d326c1b1","src/ingest.rs":"d3f528c1d62b4b6af404bb14cb0d431f8d523911ada09e4e1db5836b6cf44e04","src/interest.rs":"adbaa1e0324c7bb32b023f105b45499390a1a83973d1a8c7d727a661a25cc259","src/lib.rs":"29ce35211c9d94d561d62d7e8ef57fc56cc90a9ba42b88b54c2f4c9236a8cd4d","src/relevancy.udl":"b551e7476f30dccdc74cbf2f38fc3b87a3a7d0ec5dfa6c2ea4417b18fbc7475c","src/rs.rs":"b98091d0adca809d8fef38eb5394f885e04d4d382b7c8abd7bd0fe53f64e7bd6","src/schema.rs":"f782c712f10c4f1af2f9e1424d6b52f59a2bacfcc452a8feb763f36478f5dd5d","src/url_hash.rs":"2e908316fb70923644d1990dbf470d69ce2f5e99b0c5c3d95ec691590be8ffa5","test-data":"1ef2cd092d59e7e126cd4a514af983d449ed9f9c98708702fd237464a76c2b5e"},"package":null}
|
||||
10
third_party/rust/relevancy/Cargo.toml
vendored
10
third_party/rust/relevancy/Cargo.toml
vendored
|
|
@ -25,9 +25,12 @@ license = "MPL-2.0"
|
|||
name = "generate-test-data"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
base64 = "0.21.2"
|
||||
log = "0.4"
|
||||
md-5 = "0.10"
|
||||
parking_lot = ">=0.11,<=0.12"
|
||||
serde_json = "1"
|
||||
thiserror = "1.0"
|
||||
uniffi = "0.27.1"
|
||||
url = "2.5"
|
||||
|
|
@ -38,10 +41,17 @@ path = "../support/error"
|
|||
[dependencies.interrupt-support]
|
||||
path = "../support/interrupt"
|
||||
|
||||
[dependencies.remote_settings]
|
||||
path = "../remote_settings"
|
||||
|
||||
[dependencies.rusqlite]
|
||||
version = "0.30.0"
|
||||
features = ["bundled"]
|
||||
|
||||
[dependencies.serde]
|
||||
version = "1"
|
||||
features = ["derive"]
|
||||
|
||||
[dependencies.sql-support]
|
||||
path = "../support/sql"
|
||||
|
||||
|
|
|
|||
10
third_party/rust/relevancy/src/db.rs
vendored
10
third_party/rust/relevancy/src/db.rs
vendored
|
|
@ -20,7 +20,7 @@ pub struct RelevancyDb {
|
|||
}
|
||||
|
||||
impl RelevancyDb {
|
||||
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
|
||||
pub fn new(path: impl AsRef<Path>) -> Self {
|
||||
// Note: use `SQLITE_OPEN_READ_WRITE` for both read and write connections.
|
||||
// Even if we're opening a read connection, we may need to do a write as part of the
|
||||
// initialization process.
|
||||
|
|
@ -31,10 +31,10 @@ impl RelevancyDb {
|
|||
| OpenFlags::SQLITE_OPEN_NO_MUTEX
|
||||
| OpenFlags::SQLITE_OPEN_CREATE
|
||||
| OpenFlags::SQLITE_OPEN_READ_WRITE;
|
||||
Ok(Self {
|
||||
Self {
|
||||
reader: LazyDb::new(path.as_ref(), db_open_flags, RelevancyConnectionInitializer),
|
||||
writer: LazyDb::new(path.as_ref(), db_open_flags, RelevancyConnectionInitializer),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub fn close(&self) {
|
||||
|
|
@ -52,7 +52,7 @@ impl RelevancyDb {
|
|||
use std::sync::atomic::{AtomicU32, Ordering};
|
||||
static COUNTER: AtomicU32 = AtomicU32::new(0);
|
||||
let count = COUNTER.fetch_add(1, Ordering::Relaxed);
|
||||
Self::new(format!("file:test{count}.sqlite?mode=memory&cache=shared")).unwrap()
|
||||
Self::new(format!("file:test{count}.sqlite?mode=memory&cache=shared"))
|
||||
}
|
||||
|
||||
/// Accesses the Suggest database in a transaction for reading.
|
||||
|
|
@ -118,7 +118,7 @@ impl<'a> RelevancyDao<'a> {
|
|||
",
|
||||
)?;
|
||||
let interests = stmt.query_and_then((hash,), |row| -> Result<Interest> {
|
||||
Ok(row.get::<_, u32>(0)?.into())
|
||||
row.get::<_, u32>(0)?.try_into()
|
||||
})?;
|
||||
|
||||
let mut interest_vec = InterestVector::default();
|
||||
|
|
|
|||
12
third_party/rust/relevancy/src/error.rs
vendored
12
third_party/rust/relevancy/src/error.rs
vendored
|
|
@ -26,6 +26,18 @@ pub enum Error {
|
|||
|
||||
#[error("Interrupted")]
|
||||
Interrupted(#[from] interrupt_support::Interrupted),
|
||||
|
||||
#[error("Invalid interest code: {0}")]
|
||||
InvalidInterestCode(u32),
|
||||
|
||||
#[error("Remote Setting Error: {0}")]
|
||||
RemoteSettingsError(#[from] remote_settings::RemoteSettingsError),
|
||||
|
||||
#[error("Serde Json Error: {0}")]
|
||||
SerdeJsonError(#[from] serde_json::Error),
|
||||
|
||||
#[error("Base64 Decode Error: {0}")]
|
||||
Base64DecodeError(String),
|
||||
}
|
||||
|
||||
/// Result enum for the public API
|
||||
|
|
|
|||
394
third_party/rust/relevancy/src/ingest.rs
vendored
Normal file
394
third_party/rust/relevancy/src/ingest.rs
vendored
Normal file
|
|
@ -0,0 +1,394 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
use crate::db::RelevancyDao;
|
||||
use crate::rs::{
|
||||
RelevancyAttachmentData, RelevancyRecord, RelevancyRemoteSettingsClient,
|
||||
REMOTE_SETTINGS_COLLECTION,
|
||||
};
|
||||
use crate::url_hash::UrlHash;
|
||||
use crate::{Error, Interest, RelevancyDb, Result};
|
||||
use base64::{engine::general_purpose::STANDARD, Engine};
|
||||
use remote_settings::{Client, RemoteSettingsConfig, RemoteSettingsRecord, RemoteSettingsServer};
|
||||
|
||||
// Number of rows to write when inserting interest data before checking for interruption
|
||||
const WRITE_CHUNK_SIZE: usize = 100;
|
||||
|
||||
pub fn ensure_interest_data_populated(db: &RelevancyDb) -> Result<()> {
|
||||
if !db.read(|dao| dao.need_to_load_url_interests())? {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
match fetch_interest_data() {
|
||||
Ok(data) => {
|
||||
db.read_write(move |dao| insert_interest_data(data, dao))?;
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!("error fetching interest data: {e}");
|
||||
return Err(Error::FetchInterestDataError);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn fetch_interest_data() -> Result<Vec<(Interest, UrlHash)>> {
|
||||
let rs = Client::new(RemoteSettingsConfig {
|
||||
collection_name: REMOTE_SETTINGS_COLLECTION.to_string(),
|
||||
server: Some(RemoteSettingsServer::Prod),
|
||||
server_url: None,
|
||||
bucket_name: None,
|
||||
})?;
|
||||
fetch_interest_data_inner(rs)
|
||||
}
|
||||
|
||||
/// Fetch the interest data
|
||||
fn fetch_interest_data_inner(
|
||||
rs: impl RelevancyRemoteSettingsClient,
|
||||
) -> Result<Vec<(Interest, UrlHash)>> {
|
||||
let remote_settings_response = rs.get_records()?;
|
||||
let mut result = vec![];
|
||||
|
||||
for record in remote_settings_response.records {
|
||||
let attachment_data = match &record.attachment {
|
||||
None => return Err(Error::FetchInterestDataError),
|
||||
Some(a) => rs.get_attachment(&a.location)?,
|
||||
};
|
||||
let interest = get_interest(&record)?;
|
||||
let urls = get_hash_urls(attachment_data)?;
|
||||
result.extend(std::iter::repeat(interest).zip(urls));
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn get_hash_urls(attachment_data: Vec<u8>) -> Result<Vec<UrlHash>> {
|
||||
let mut hash_urls = vec![];
|
||||
|
||||
let parsed_attachment_data =
|
||||
serde_json::from_slice::<Vec<RelevancyAttachmentData>>(&attachment_data)?;
|
||||
|
||||
for attachment_data in parsed_attachment_data {
|
||||
let hash_url = STANDARD
|
||||
.decode(attachment_data.domain)
|
||||
.map_err(|_| Error::Base64DecodeError("Invalid base64 error".to_string()))?;
|
||||
let url_hash = hash_url.try_into().map_err(|_| {
|
||||
Error::Base64DecodeError("Base64 string has wrong number of bytes".to_string())
|
||||
})?;
|
||||
hash_urls.push(url_hash);
|
||||
}
|
||||
Ok(hash_urls)
|
||||
}
|
||||
|
||||
/// Extract Interest from the record info
|
||||
fn get_interest(record: &RemoteSettingsRecord) -> Result<Interest> {
|
||||
let record_fields: RelevancyRecord =
|
||||
serde_json::from_value(serde_json::Value::Object(record.fields.clone()))?;
|
||||
let custom_details = record_fields.record_custom_details;
|
||||
let category_code = custom_details.category_to_domains.category_code;
|
||||
Interest::try_from(category_code as u32)
|
||||
}
|
||||
|
||||
/// Insert Interests into Db
|
||||
fn insert_interest_data(data: Vec<(Interest, UrlHash)>, dao: &mut RelevancyDao) -> Result<()> {
|
||||
for chunk in data.chunks(WRITE_CHUNK_SIZE) {
|
||||
dao.err_if_interrupted()?;
|
||||
for (interest, hash_url) in chunk {
|
||||
dao.add_url_interest(*hash_url, *interest)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
use std::{cell::RefCell, collections::HashMap};
|
||||
|
||||
use anyhow::Context;
|
||||
use remote_settings::RemoteSettingsResponse;
|
||||
use serde_json::json;
|
||||
|
||||
use super::*;
|
||||
use crate::{rs::RelevancyRemoteSettingsClient, url_hash::hash_url, InterestVector};
|
||||
|
||||
/// A snapshot containing fake Remote Settings records and attachments for
|
||||
/// the store to ingest. We use snapshots to test the store's behavior in a
|
||||
/// data-driven way.
|
||||
struct Snapshot {
|
||||
records: Vec<RemoteSettingsRecord>,
|
||||
attachments: HashMap<&'static str, Vec<u8>>,
|
||||
}
|
||||
|
||||
impl Snapshot {
|
||||
/// Creates a snapshot from a JSON value that represents a collection of
|
||||
/// Relevancy Remote Settings records.
|
||||
///
|
||||
/// You can use the [`serde_json::json!`] macro to construct the JSON
|
||||
/// value, then pass it to this function. It's easier to use the
|
||||
/// `Snapshot::with_records(json!(...))` idiom than to construct the
|
||||
/// records by hand.
|
||||
fn with_records(value: serde_json::Value) -> anyhow::Result<Self> {
|
||||
Ok(Self {
|
||||
records: serde_json::from_value(value)
|
||||
.context("Couldn't create snapshot with Remote Settings records")?,
|
||||
attachments: HashMap::new(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Adds a data attachment to the snapshot.
|
||||
fn with_data(
|
||||
mut self,
|
||||
location: &'static str,
|
||||
value: serde_json::Value,
|
||||
) -> anyhow::Result<Self> {
|
||||
self.attachments.insert(
|
||||
location,
|
||||
serde_json::to_vec(&value).context("Couldn't add data attachment to snapshot")?,
|
||||
);
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
/// A fake Remote Settings client that returns records and attachments from
|
||||
/// a snapshot.
|
||||
struct SnapshotSettingsClient {
|
||||
/// The current snapshot. You can modify it using
|
||||
/// [`RefCell::borrow_mut()`] to simulate remote updates in tests.
|
||||
snapshot: RefCell<Snapshot>,
|
||||
}
|
||||
|
||||
impl SnapshotSettingsClient {
|
||||
/// Creates a client with an initial snapshot.
|
||||
fn with_snapshot(snapshot: Snapshot) -> Self {
|
||||
Self {
|
||||
snapshot: RefCell::new(snapshot),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RelevancyRemoteSettingsClient for SnapshotSettingsClient {
|
||||
fn get_records(&self) -> Result<RemoteSettingsResponse> {
|
||||
let records = self.snapshot.borrow().records.clone();
|
||||
let last_modified = records
|
||||
.iter()
|
||||
.map(|record: &RemoteSettingsRecord| record.last_modified)
|
||||
.max()
|
||||
.unwrap_or(0);
|
||||
Ok(RemoteSettingsResponse {
|
||||
records,
|
||||
last_modified,
|
||||
})
|
||||
}
|
||||
|
||||
fn get_attachment(&self, location: &str) -> Result<Vec<u8>> {
|
||||
Ok(self
|
||||
.snapshot
|
||||
.borrow()
|
||||
.attachments
|
||||
.get(location)
|
||||
.unwrap_or_else(|| unreachable!("Unexpected request for attachment `{}`", location))
|
||||
.clone())
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_interest_vectors() {
|
||||
let db = RelevancyDb::new_for_test();
|
||||
db.read_write(|dao| {
|
||||
// Test that the interest data matches the values we started from in
|
||||
// `bin/generate-test-data.rs`
|
||||
|
||||
dao.add_url_interest(hash_url("https://espn.com").unwrap(), Interest::Sports)?;
|
||||
dao.add_url_interest(hash_url("https://dogs.com").unwrap(), Interest::Animals)?;
|
||||
dao.add_url_interest(hash_url("https://cars.com").unwrap(), Interest::Autos)?;
|
||||
dao.add_url_interest(
|
||||
hash_url("https://www.vouge.com").unwrap(),
|
||||
Interest::Fashion,
|
||||
)?;
|
||||
dao.add_url_interest(hash_url("https://slashdot.org").unwrap(), Interest::Tech)?;
|
||||
dao.add_url_interest(hash_url("https://www.nascar.com").unwrap(), Interest::Autos)?;
|
||||
dao.add_url_interest(
|
||||
hash_url("https://www.nascar.com").unwrap(),
|
||||
Interest::Sports,
|
||||
)?;
|
||||
dao.add_url_interest(
|
||||
hash_url("https://unknown.url").unwrap(),
|
||||
Interest::Inconclusive,
|
||||
)?;
|
||||
|
||||
assert_eq!(
|
||||
dao.get_url_interest_vector("https://espn.com/").unwrap(),
|
||||
InterestVector {
|
||||
sports: 1,
|
||||
..InterestVector::default()
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
dao.get_url_interest_vector("https://dogs.com/").unwrap(),
|
||||
InterestVector {
|
||||
animals: 1,
|
||||
..InterestVector::default()
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
dao.get_url_interest_vector("https://cars.com/").unwrap(),
|
||||
InterestVector {
|
||||
autos: 1,
|
||||
..InterestVector::default()
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
dao.get_url_interest_vector("https://www.vouge.com/")
|
||||
.unwrap(),
|
||||
InterestVector {
|
||||
fashion: 1,
|
||||
..InterestVector::default()
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
dao.get_url_interest_vector("https://slashdot.org/")
|
||||
.unwrap(),
|
||||
InterestVector {
|
||||
tech: 1,
|
||||
..InterestVector::default()
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
dao.get_url_interest_vector("https://www.nascar.com/")
|
||||
.unwrap(),
|
||||
InterestVector {
|
||||
autos: 1,
|
||||
sports: 1,
|
||||
..InterestVector::default()
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
dao.get_url_interest_vector("https://unknown.url/").unwrap(),
|
||||
InterestVector {
|
||||
inconclusive: 1,
|
||||
..InterestVector::default()
|
||||
}
|
||||
);
|
||||
Ok(())
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_variations_on_the_url() {
|
||||
let db = RelevancyDb::new_for_test();
|
||||
db.read_write(|dao| {
|
||||
dao.add_url_interest(hash_url("https://espn.com").unwrap(), Interest::Sports)?;
|
||||
dao.add_url_interest(hash_url("https://nascar.com").unwrap(), Interest::Autos)?;
|
||||
dao.add_url_interest(hash_url("https://nascar.com").unwrap(), Interest::Sports)?;
|
||||
|
||||
// Different paths/queries should work
|
||||
assert_eq!(
|
||||
dao.get_url_interest_vector("https://espn.com/foo/bar/?baz")
|
||||
.unwrap(),
|
||||
InterestVector {
|
||||
sports: 1,
|
||||
..InterestVector::default()
|
||||
}
|
||||
);
|
||||
// Different schemes should too
|
||||
assert_eq!(
|
||||
dao.get_url_interest_vector("http://espn.com/").unwrap(),
|
||||
InterestVector {
|
||||
sports: 1,
|
||||
..InterestVector::default()
|
||||
}
|
||||
);
|
||||
// But changes to the domain shouldn't
|
||||
assert_eq!(
|
||||
dao.get_url_interest_vector("http://espn2.com/").unwrap(),
|
||||
InterestVector::default()
|
||||
);
|
||||
// However, extra components past the 2nd one in the domain are ignored
|
||||
assert_eq!(
|
||||
dao.get_url_interest_vector("https://www.nascar.com/")
|
||||
.unwrap(),
|
||||
InterestVector {
|
||||
autos: 1,
|
||||
sports: 1,
|
||||
..InterestVector::default()
|
||||
}
|
||||
);
|
||||
Ok(())
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_records() -> anyhow::Result<()> {
|
||||
let snapshot = Snapshot::with_records(json!([{
|
||||
"id": "animals-0001",
|
||||
"last_modified": 15,
|
||||
"type": "category_to_domains",
|
||||
"attachment": {
|
||||
"filename": "data-1.json",
|
||||
"mimetype": "application/json",
|
||||
"location": "data-1.json",
|
||||
"hash": "",
|
||||
"size": 0
|
||||
},
|
||||
"record_custom_details": {
|
||||
"category_to_domains": {
|
||||
"category": "animals",
|
||||
"category_code": 1,
|
||||
"version": 1
|
||||
}
|
||||
}
|
||||
}]))?
|
||||
.with_data(
|
||||
"data-1.json",
|
||||
json!([
|
||||
{"domain": "J2jtyjQtYQ/+/p//xhz43Q=="},
|
||||
{"domain": "Zd4awCwGZLkat59nIWje3g=="}]),
|
||||
)?;
|
||||
let rs_client = SnapshotSettingsClient::with_snapshot(snapshot);
|
||||
assert_eq!(
|
||||
fetch_interest_data_inner(rs_client).unwrap(),
|
||||
vec![
|
||||
(Interest::Animals, hash_url("https://dogs.com").unwrap()),
|
||||
(Interest::Animals, hash_url("https://cats.com").unwrap())
|
||||
]
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_records_with_bad_domain_strings() -> anyhow::Result<()> {
|
||||
let snapshot = Snapshot::with_records(json!([{
|
||||
"id": "animals-0001",
|
||||
"last_modified": 15,
|
||||
"type": "category_to_domains",
|
||||
"attachment": {
|
||||
"filename": "data-1.json",
|
||||
"mimetype": "application/json",
|
||||
"location": "data-1.json",
|
||||
"hash": "",
|
||||
"size": 0
|
||||
},
|
||||
"record_custom_details": {
|
||||
"category_to_domains": {
|
||||
"category": "animals",
|
||||
"category_code": 1,
|
||||
"version": 1
|
||||
}
|
||||
}
|
||||
}]))?
|
||||
.with_data(
|
||||
"data-1.json",
|
||||
json!([
|
||||
{"domain": "badString"},
|
||||
{"domain": "notBase64"}]),
|
||||
)?;
|
||||
let rs_client = SnapshotSettingsClient::with_snapshot(snapshot);
|
||||
fetch_interest_data_inner(rs_client).expect_err("Invalid base64 error");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
133
third_party/rust/relevancy/src/interest.rs
vendored
133
third_party/rust/relevancy/src/interest.rs
vendored
|
|
@ -2,33 +2,37 @@
|
|||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
use crate::Error;
|
||||
|
||||
/// List of possible interests for a domain. Domains can have be associated with one or multiple
|
||||
/// interests. `Inconclusive` is used for domains in the user's top sites that we can't classify
|
||||
/// because there's no corresponding entry in the interest database.
|
||||
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||
#[repr(u32)]
|
||||
pub enum Interest {
|
||||
Inconclusive,
|
||||
Animals,
|
||||
Arts,
|
||||
Autos,
|
||||
Business,
|
||||
Career,
|
||||
Education,
|
||||
Fashion,
|
||||
Finance,
|
||||
Food,
|
||||
Government,
|
||||
// Note: if you change these codes, make sure to update the `TryFrom<u32>` implementation and
|
||||
// the `test_interest_code_conversion` test.
|
||||
Inconclusive = 0,
|
||||
Animals = 1,
|
||||
Arts = 2,
|
||||
Autos = 3,
|
||||
Business = 4,
|
||||
Career = 5,
|
||||
Education = 6,
|
||||
Fashion = 7,
|
||||
Finance = 8,
|
||||
Food = 9,
|
||||
Government = 10,
|
||||
//Disable this per policy consultation
|
||||
// Health,
|
||||
Hobbies,
|
||||
Home,
|
||||
News,
|
||||
RealEstate,
|
||||
Society,
|
||||
Sports,
|
||||
Tech,
|
||||
Travel,
|
||||
// Health = 11,
|
||||
Hobbies = 12,
|
||||
Home = 13,
|
||||
News = 14,
|
||||
RealEstate = 15,
|
||||
Society = 16,
|
||||
Sports = 17,
|
||||
Tech = 18,
|
||||
Travel = 19,
|
||||
}
|
||||
|
||||
impl From<Interest> for u32 {
|
||||
|
|
@ -43,14 +47,35 @@ impl From<Interest> for usize {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<u32> for Interest {
|
||||
fn from(code: u32) -> Self {
|
||||
if code as usize > Self::COUNT {
|
||||
panic!("Invalid interest code: {code}")
|
||||
impl TryFrom<u32> for Interest {
|
||||
// On error, return the invalid code back
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(code: u32) -> Result<Self, Self::Error> {
|
||||
match code {
|
||||
0 => Ok(Self::Inconclusive),
|
||||
1 => Ok(Self::Animals),
|
||||
2 => Ok(Self::Arts),
|
||||
3 => Ok(Self::Autos),
|
||||
4 => Ok(Self::Business),
|
||||
5 => Ok(Self::Career),
|
||||
6 => Ok(Self::Education),
|
||||
7 => Ok(Self::Fashion),
|
||||
8 => Ok(Self::Finance),
|
||||
9 => Ok(Self::Food),
|
||||
10 => Ok(Self::Government),
|
||||
//Disable this per policy consultation
|
||||
// 11 => Ok(Self::Health),
|
||||
12 => Ok(Self::Hobbies),
|
||||
13 => Ok(Self::Home),
|
||||
14 => Ok(Self::News),
|
||||
15 => Ok(Self::RealEstate),
|
||||
16 => Ok(Self::Society),
|
||||
17 => Ok(Self::Sports),
|
||||
18 => Ok(Self::Tech),
|
||||
19 => Ok(Self::Travel),
|
||||
n => Err(Error::InvalidInterestCode(n)),
|
||||
}
|
||||
// Safety: This is safe since Interest has a u32 representation and we've done a bounds
|
||||
// check
|
||||
unsafe { std::mem::transmute(code) }
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -111,6 +136,34 @@ pub struct InterestVector {
|
|||
pub travel: u32,
|
||||
}
|
||||
|
||||
impl std::ops::Add for InterestVector {
|
||||
type Output = Self;
|
||||
|
||||
fn add(self, other: Self) -> Self {
|
||||
Self {
|
||||
inconclusive: self.inconclusive + other.inconclusive,
|
||||
animals: self.animals + other.animals,
|
||||
arts: self.arts + other.arts,
|
||||
autos: self.autos + other.autos,
|
||||
business: self.business + other.business,
|
||||
career: self.career + other.career,
|
||||
education: self.education + other.education,
|
||||
fashion: self.fashion + other.fashion,
|
||||
finance: self.finance + other.finance,
|
||||
food: self.food + other.food,
|
||||
government: self.government + other.government,
|
||||
hobbies: self.hobbies + other.hobbies,
|
||||
home: self.home + other.home,
|
||||
news: self.news + other.news,
|
||||
real_estate: self.real_estate + other.real_estate,
|
||||
society: self.society + other.society,
|
||||
sports: self.sports + other.sports,
|
||||
tech: self.tech + other.tech,
|
||||
travel: self.travel + other.travel,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Index<Interest> for InterestVector {
|
||||
type Output = u32;
|
||||
|
||||
|
|
@ -166,3 +219,29 @@ impl std::ops::IndexMut<Interest> for InterestVector {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_interest_code_conversion() {
|
||||
for interest in Interest::all() {
|
||||
assert_eq!(Interest::try_from(u32::from(interest)).unwrap(), interest)
|
||||
}
|
||||
// try_from() for out of bounds codes should return an error
|
||||
assert!(matches!(
|
||||
Interest::try_from(20),
|
||||
Err(Error::InvalidInterestCode(20))
|
||||
));
|
||||
assert!(matches!(
|
||||
Interest::try_from(100),
|
||||
Err(Error::InvalidInterestCode(100))
|
||||
));
|
||||
// Health is currently disabled, so it's code should return None for now
|
||||
assert!(matches!(
|
||||
Interest::try_from(11),
|
||||
Err(Error::InvalidInterestCode(11))
|
||||
));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
72
third_party/rust/relevancy/src/lib.rs
vendored
72
third_party/rust/relevancy/src/lib.rs
vendored
|
|
@ -11,8 +11,9 @@
|
|||
|
||||
mod db;
|
||||
mod error;
|
||||
mod ingest;
|
||||
mod interest;
|
||||
mod populate_interests;
|
||||
mod rs;
|
||||
mod schema;
|
||||
pub mod url_hash;
|
||||
|
||||
|
|
@ -28,11 +29,10 @@ pub struct RelevancyStore {
|
|||
|
||||
/// Top-level API for the Relevancy component
|
||||
impl RelevancyStore {
|
||||
#[handle_error(Error)]
|
||||
pub fn new(db_path: String) -> ApiResult<Self> {
|
||||
Ok(Self {
|
||||
db: RelevancyDb::new(db_path)?,
|
||||
})
|
||||
pub fn new(db_path: String) -> Self {
|
||||
Self {
|
||||
db: RelevancyDb::new(db_path),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn close(&self) {
|
||||
|
|
@ -55,9 +55,21 @@ impl RelevancyStore {
|
|||
///
|
||||
/// This method may execute for a long time and should only be called from a worker thread.
|
||||
#[handle_error(Error)]
|
||||
pub fn ingest(&self, _top_urls_by_frecency: Vec<String>) -> ApiResult<()> {
|
||||
populate_interests::ensure_interest_data_populated(&self.db)?;
|
||||
todo!()
|
||||
pub fn ingest(&self, top_urls_by_frecency: Vec<String>) -> ApiResult<InterestVector> {
|
||||
ingest::ensure_interest_data_populated(&self.db)?;
|
||||
self.classify(top_urls_by_frecency)
|
||||
}
|
||||
|
||||
pub fn classify(&self, top_urls_by_frecency: Vec<String>) -> Result<InterestVector> {
|
||||
// For experimentation purposes we are going to return an interest vector.
|
||||
// Eventually we would want to store this data in the DB and incrementally update it.
|
||||
let mut interest_vector = InterestVector::default();
|
||||
for url in top_urls_by_frecency {
|
||||
let interest_count = self.db.read(|dao| dao.get_url_interest_vector(&url))?;
|
||||
interest_vector = interest_vector + interest_count;
|
||||
}
|
||||
|
||||
Ok(interest_vector)
|
||||
}
|
||||
|
||||
/// Calculate metrics for the validation phase
|
||||
|
|
@ -87,3 +99,45 @@ pub struct InterestMetrics {
|
|||
}
|
||||
|
||||
uniffi::include_scaffolding!("relevancy");
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::url_hash::hash_url;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_ingest() {
|
||||
let top_urls = vec![
|
||||
"https://food.com/".to_string(),
|
||||
"https://hello.com".to_string(),
|
||||
"https://pasta.com".to_string(),
|
||||
"https://dog.com".to_string(),
|
||||
];
|
||||
let relevancy_store =
|
||||
RelevancyStore::new("file:test_store_data?mode=memory&cache=shared".to_owned());
|
||||
relevancy_store
|
||||
.db
|
||||
.read_write(|dao| {
|
||||
dao.add_url_interest(hash_url("https://food.com").unwrap(), Interest::Food)?;
|
||||
dao.add_url_interest(
|
||||
hash_url("https://hello.com").unwrap(),
|
||||
Interest::Inconclusive,
|
||||
)?;
|
||||
dao.add_url_interest(hash_url("https://pasta.com").unwrap(), Interest::Food)?;
|
||||
dao.add_url_interest(hash_url("https://dog.com").unwrap(), Interest::Animals)?;
|
||||
Ok(())
|
||||
})
|
||||
.expect("Insert should succeed");
|
||||
|
||||
assert_eq!(
|
||||
relevancy_store.ingest(top_urls).unwrap(),
|
||||
InterestVector {
|
||||
inconclusive: 1,
|
||||
animals: 1,
|
||||
food: 2,
|
||||
..InterestVector::default()
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
164
third_party/rust/relevancy/src/populate_interests.rs
vendored
164
third_party/rust/relevancy/src/populate_interests.rs
vendored
|
|
@ -1,164 +0,0 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
use crate::{url_hash::UrlHash, Error, Interest, RelevancyDb, Result};
|
||||
use std::io::{Cursor, Read};
|
||||
|
||||
// Number of rows to write when inserting interest data before checking for interruption
|
||||
const WRITE_CHUNK_SIZE: usize = 100;
|
||||
|
||||
pub fn ensure_interest_data_populated(db: &RelevancyDb) -> Result<()> {
|
||||
if !db.read(|dao| dao.need_to_load_url_interests())? {
|
||||
return Ok(());
|
||||
}
|
||||
let interest_data = match fetch_interest_data() {
|
||||
Ok(data) => data,
|
||||
Err(e) => {
|
||||
log::warn!("error fetching interest data: {e}");
|
||||
return Err(Error::FetchInterestDataError);
|
||||
}
|
||||
};
|
||||
db.read_write(move |dao| {
|
||||
for chunk in interest_data.chunks(WRITE_CHUNK_SIZE) {
|
||||
for (url_hash, interest) in chunk {
|
||||
dao.add_url_interest(*url_hash, *interest)?;
|
||||
}
|
||||
dao.err_if_interrupted()?;
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
/// Fetch the interest data
|
||||
fn fetch_interest_data() -> std::io::Result<Vec<(UrlHash, Interest)>> {
|
||||
// TODO: this hack should be replaced with something that fetches from remote settings.
|
||||
// It should ideally check for interruption while fetching the data.
|
||||
let bytes = include_bytes!("../test-data");
|
||||
let mut reader = Cursor::new(&bytes);
|
||||
let mut data = vec![];
|
||||
|
||||
// Loop over all possible interests
|
||||
for interest in Interest::all() {
|
||||
// read the count
|
||||
let mut buf = [0u8; 4];
|
||||
reader.read_exact(&mut buf)?;
|
||||
let count = u32::from_le_bytes(buf);
|
||||
for _ in 0..count {
|
||||
let mut url_hash: UrlHash = [0u8; 16];
|
||||
reader.read_exact(&mut url_hash)?;
|
||||
data.push((url_hash, interest));
|
||||
}
|
||||
}
|
||||
Ok(data)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::InterestVector;
|
||||
|
||||
#[test]
|
||||
fn test_interest_vectors() {
|
||||
let db = RelevancyDb::new_for_test();
|
||||
ensure_interest_data_populated(&db).unwrap();
|
||||
db.read(|dao| {
|
||||
// Test that the interest data matches the values we started from in
|
||||
// `bin/generate-test-data.rs`
|
||||
assert_eq!(
|
||||
dao.get_url_interest_vector("https://espn.com/").unwrap(),
|
||||
InterestVector {
|
||||
sports: 1,
|
||||
..InterestVector::default()
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
dao.get_url_interest_vector("https://dogs.com/").unwrap(),
|
||||
InterestVector {
|
||||
animals: 1,
|
||||
..InterestVector::default()
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
dao.get_url_interest_vector("https://cars.com/").unwrap(),
|
||||
InterestVector {
|
||||
autos: 1,
|
||||
..InterestVector::default()
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
dao.get_url_interest_vector("https://www.vouge.com/")
|
||||
.unwrap(),
|
||||
InterestVector {
|
||||
fashion: 1,
|
||||
..InterestVector::default()
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
dao.get_url_interest_vector("https://slashdot.org/")
|
||||
.unwrap(),
|
||||
InterestVector {
|
||||
tech: 1,
|
||||
..InterestVector::default()
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
dao.get_url_interest_vector("https://www.nascar.com/")
|
||||
.unwrap(),
|
||||
InterestVector {
|
||||
autos: 1,
|
||||
sports: 1,
|
||||
..InterestVector::default()
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
dao.get_url_interest_vector("https://unknown.url/").unwrap(),
|
||||
InterestVector::default()
|
||||
);
|
||||
Ok(())
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_variations_on_the_url() {
|
||||
let db = RelevancyDb::new_for_test();
|
||||
ensure_interest_data_populated(&db).unwrap();
|
||||
db.read(|dao| {
|
||||
// Different paths/queries should work
|
||||
assert_eq!(
|
||||
dao.get_url_interest_vector("https://espn.com/foo/bar/?baz")
|
||||
.unwrap(),
|
||||
InterestVector {
|
||||
sports: 1,
|
||||
..InterestVector::default()
|
||||
}
|
||||
);
|
||||
// Different schemes should too
|
||||
assert_eq!(
|
||||
dao.get_url_interest_vector("http://espn.com/").unwrap(),
|
||||
InterestVector {
|
||||
sports: 1,
|
||||
..InterestVector::default()
|
||||
}
|
||||
);
|
||||
// But changes to the domain shouldn't
|
||||
assert_eq!(
|
||||
dao.get_url_interest_vector("http://www.espn.com/").unwrap(),
|
||||
InterestVector::default()
|
||||
);
|
||||
// However, extra components past the 3rd one in the domain are ignored
|
||||
assert_eq!(
|
||||
dao.get_url_interest_vector("https://foo.www.nascar.com/")
|
||||
.unwrap(),
|
||||
InterestVector {
|
||||
autos: 1,
|
||||
sports: 1,
|
||||
..InterestVector::default()
|
||||
}
|
||||
);
|
||||
Ok(())
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
3
third_party/rust/relevancy/src/relevancy.udl
vendored
3
third_party/rust/relevancy/src/relevancy.udl
vendored
|
|
@ -10,7 +10,6 @@ interface RelevancyStore {
|
|||
// Construct a new RelevancyStore
|
||||
//
|
||||
// This is non-blocking since databases and other resources are lazily opened.
|
||||
[Throws=RelevancyApiError]
|
||||
constructor(string dbpath);
|
||||
|
||||
// Close any open resources (for example databases)
|
||||
|
|
@ -23,7 +22,7 @@ interface RelevancyStore {
|
|||
|
||||
// Ingest the top URLs by frequency to build up the user's interest vector
|
||||
[Throws=RelevancyApiError]
|
||||
void ingest(sequence<string> top_urls);
|
||||
InterestVector ingest(sequence<string> top_urls);
|
||||
|
||||
// Calculate metrics for the user's interest vector in order to measure how strongly we're
|
||||
// identifying interests. See the `InterestMetrics` struct for details.
|
||||
|
|
|
|||
60
third_party/rust/relevancy/src/rs.rs
vendored
Normal file
60
third_party/rust/relevancy/src/rs.rs
vendored
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
*/
|
||||
|
||||
use crate::Result;
|
||||
use remote_settings::RemoteSettingsResponse;
|
||||
use serde::Deserialize;
|
||||
/// The Remote Settings collection name.
|
||||
pub(crate) const REMOTE_SETTINGS_COLLECTION: &str = "content-relevance";
|
||||
|
||||
/// A trait for a client that downloads records from Remote Settings.
|
||||
///
|
||||
/// This trait lets tests use a mock client.
|
||||
pub(crate) trait RelevancyRemoteSettingsClient {
|
||||
/// Fetches records from the Suggest Remote Settings collection.
|
||||
fn get_records(&self) -> Result<RemoteSettingsResponse>;
|
||||
|
||||
/// Fetches a record's attachment from the Suggest Remote Settings
|
||||
/// collection.
|
||||
fn get_attachment(&self, location: &str) -> Result<Vec<u8>>;
|
||||
}
|
||||
|
||||
impl RelevancyRemoteSettingsClient for remote_settings::Client {
|
||||
fn get_records(&self) -> Result<RemoteSettingsResponse> {
|
||||
Ok(remote_settings::Client::get_records(self)?)
|
||||
}
|
||||
|
||||
fn get_attachment(&self, location: &str) -> Result<Vec<u8>> {
|
||||
Ok(remote_settings::Client::get_attachment(self, location)?)
|
||||
}
|
||||
}
|
||||
|
||||
/// A record in the Relevancy Remote Settings collection.
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
pub struct RelevancyRecord {
|
||||
#[serde(rename = "type")]
|
||||
pub record_type: String,
|
||||
pub record_custom_details: RecordCustomDetails,
|
||||
}
|
||||
|
||||
// Custom details related to category of the record.
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
pub struct RecordCustomDetails {
|
||||
pub category_to_domains: CategoryToDomains,
|
||||
}
|
||||
|
||||
/// Category information related to the record.
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
pub struct CategoryToDomains {
|
||||
pub version: i32,
|
||||
pub category: String,
|
||||
pub category_code: i32,
|
||||
}
|
||||
|
||||
/// A downloaded Remote Settings attachment that contains domain data.
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
pub struct RelevancyAttachmentData {
|
||||
pub domain: String,
|
||||
}
|
||||
15
third_party/rust/relevancy/src/url_hash.rs
vendored
15
third_party/rust/relevancy/src/url_hash.rs
vendored
|
|
@ -8,11 +8,10 @@ use url::{Host, Url};
|
|||
pub type UrlHash = [u8; 16];
|
||||
|
||||
/// Given a URL, extract the part of it that we want to use to identify it.
|
||||
///
|
||||
/// We currently use the final 3 components of the URL domain.
|
||||
///
|
||||
/// TODO: decide if this should be 3 or 3 components.
|
||||
pub fn url_hash_source(url: &str) -> Option<String> {
|
||||
// We currently use the final 2 components of the URL domain.
|
||||
const URL_COMPONENTS_TO_USE: usize = 2;
|
||||
|
||||
let url = Url::parse(url).ok()?;
|
||||
let domain = match url.host() {
|
||||
Some(Host::Domain(d)) => d,
|
||||
|
|
@ -20,7 +19,7 @@ pub fn url_hash_source(url: &str) -> Option<String> {
|
|||
};
|
||||
// This will store indexes of `.` chars as we search backwards.
|
||||
let mut pos = domain.len();
|
||||
for _ in 0..3 {
|
||||
for _ in 0..URL_COMPONENTS_TO_USE {
|
||||
match domain[0..pos].rfind('.') {
|
||||
Some(p) => pos = p,
|
||||
// The domain has less than 3 dots, return it all
|
||||
|
|
@ -47,12 +46,12 @@ mod test {
|
|||
fn test_url_hash_source() {
|
||||
let table = [
|
||||
("http://example.com/some-path", Some("example.com")),
|
||||
("http://foo.example.com/some-path", Some("foo.example.com")),
|
||||
("http://foo.example.com/some-path", Some("example.com")),
|
||||
(
|
||||
"http://foo.bar.baz.example.com/some-path",
|
||||
Some("baz.example.com"),
|
||||
Some("example.com"),
|
||||
),
|
||||
("http://foo.com.uk/some-path", Some("foo.com.uk")),
|
||||
("http://foo.com.uk/some-path", Some("com.uk")),
|
||||
("http://amazon.com/some-path", Some("amazon.com")),
|
||||
("http://192.168.0.1/some-path", None),
|
||||
];
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
{"files":{"Cargo.toml":"05e4d7f7b3649a3e3fa441c4af53a633d18f20bb04fd761ed33fc9d461fd0dee","README.md":"fb72d0028586cab1421b853ef529d7ce78ad7316818b7733a4f3488b0fba67f7","benches/benchmark_all.rs":"c2343c9197b6d9ccb0798d7701b1b0d2569d494dd31a975d21d7ec6f26e32879","build.rs":"78780c5cccfe22c3ff4198624b9e188559c437c3e6fa1c8bb66548eee6aa66bf","src/benchmarks/README.md":"ee6d50df2c31cfd80a5bc047011b518dcf57f1ef928a811bb770f1a09f41b3de","src/benchmarks/client.rs":"4b2125031d740ca1ab468e76bbea777ac0bc4cc221b03b7bc2da773bed61dac5","src/benchmarks/ingest.rs":"1ffdc403fb945ea0b58353df9773ba45ab0e9082d61dd5330ad49fad8cbb5d9f","src/benchmarks/mod.rs":"fe1898ba4d783213525da10d92858ee84cebfd22749bad7aeb461d338fe5504a","src/bin/debug_ingestion_sizes.rs":"ce6e810be7b3fc19e826d75b622b82cfab5a1a99397a6d0833c2c4eebff2d364","src/config.rs":"206ae9dc768c755649cb0c88a7b1fc3c926c715441784f61e9dc06a8a02fc568","src/db.rs":"734f5fd9f36f03c07a508a9a353872b81107f5fe09f27294ba27d7e1249e3988","src/error.rs":"f563210a6c050d98ec85e0f6d9401e7373bfb816e865e8edabbabb23d848ba13","src/keyword.rs":"988d0ab021c0df19cfd3c519df7d37f606bf984cd14d0efca4e5a7aff88344dd","src/lib.rs":"18f988eb49626c6e186c8bc65a51b4a40d796f36d3de8905506f76c6e5e876cd","src/pocket.rs":"1316668840ec9b4ea886223921dc9d3b5a1731d1a5206c0b1089f2a6c45c1b7b","src/provider.rs":"fe76f19a223f5cac056c7d48525087ca2c26bf0629b0e11b1f8dc98d165c8bb2","src/rs.rs":"e3eabde58c859ebe1154bf8da56ca134ace135934e3f280acc8186b4204399b3","src/schema.rs":"88ff3ae6b652fa5a5cff4dc504d11a7fc33f1b2ee9716b970f646d9f9ca90ab7","src/store.rs":"5873438bfc2d2a3e112935bb196bcd1f9b46351d1b341113115f45f7117fc3bf","src/suggest.udl":"b49043c5ec0210aeccf92eadbc1acdce697fc588a2500a281e083b3d8c42ff73","src/suggestion.rs":"f31227779d13d1b03a622e08a417ceba4afb161885a01c2bc87a6a652b5e8be5","src/yelp.rs":"9c0dc02a994cc05df524aa4ef337d10f575d1891259193b6419fed6fe279cb54","uniffi.toml":"f26317442ddb5b3281245bef6e60ffcb78bb95d29fe4a351a56dbb88d4ec8aab"},"package":null}
|
||||
{"files":{"Cargo.toml":"05e4d7f7b3649a3e3fa441c4af53a633d18f20bb04fd761ed33fc9d461fd0dee","README.md":"fb72d0028586cab1421b853ef529d7ce78ad7316818b7733a4f3488b0fba67f7","benches/benchmark_all.rs":"c2343c9197b6d9ccb0798d7701b1b0d2569d494dd31a975d21d7ec6f26e32879","build.rs":"78780c5cccfe22c3ff4198624b9e188559c437c3e6fa1c8bb66548eee6aa66bf","src/benchmarks/README.md":"ee6d50df2c31cfd80a5bc047011b518dcf57f1ef928a811bb770f1a09f41b3de","src/benchmarks/client.rs":"4b2125031d740ca1ab468e76bbea777ac0bc4cc221b03b7bc2da773bed61dac5","src/benchmarks/ingest.rs":"1ffdc403fb945ea0b58353df9773ba45ab0e9082d61dd5330ad49fad8cbb5d9f","src/benchmarks/mod.rs":"fe1898ba4d783213525da10d92858ee84cebfd22749bad7aeb461d338fe5504a","src/bin/debug_ingestion_sizes.rs":"ce6e810be7b3fc19e826d75b622b82cfab5a1a99397a6d0833c2c4eebff2d364","src/config.rs":"206ae9dc768c755649cb0c88a7b1fc3c926c715441784f61e9dc06a8a02fc568","src/db.rs":"a4e18b9f45e0473ea64b5ecdf6d1d67e0519f9629d495c157b0bd1b47c3e2f4f","src/error.rs":"f563210a6c050d98ec85e0f6d9401e7373bfb816e865e8edabbabb23d848ba13","src/keyword.rs":"988d0ab021c0df19cfd3c519df7d37f606bf984cd14d0efca4e5a7aff88344dd","src/lib.rs":"18f988eb49626c6e186c8bc65a51b4a40d796f36d3de8905506f76c6e5e876cd","src/pocket.rs":"1316668840ec9b4ea886223921dc9d3b5a1731d1a5206c0b1089f2a6c45c1b7b","src/provider.rs":"fe76f19a223f5cac056c7d48525087ca2c26bf0629b0e11b1f8dc98d165c8bb2","src/rs.rs":"e3eabde58c859ebe1154bf8da56ca134ace135934e3f280acc8186b4204399b3","src/schema.rs":"88ff3ae6b652fa5a5cff4dc504d11a7fc33f1b2ee9716b970f646d9f9ca90ab7","src/store.rs":"aad193774eecec739a7debd1c9e4fd46df384e7a524203e5e5f0354b93f73c1c","src/suggest.udl":"bfa653aa88c954860a9728a597daad8f4a7db8c81bc156725bf801f7cddf8459","src/suggestion.rs":"f31227779d13d1b03a622e08a417ceba4afb161885a01c2bc87a6a652b5e8be5","src/yelp.rs":"9c0dc02a994cc05df524aa4ef337d10f575d1891259193b6419fed6fe279cb54","uniffi.toml":"f26317442ddb5b3281245bef6e60ffcb78bb95d29fe4a351a56dbb88d4ec8aab"},"package":null}
|
||||
6
third_party/rust/suggest/src/db.rs
vendored
6
third_party/rust/suggest/src/db.rs
vendored
|
|
@ -188,6 +188,12 @@ impl<'a> SuggestDao<'a> {
|
|||
//
|
||||
// These methods implement CRUD operations
|
||||
|
||||
pub fn suggestions_table_empty(&self) -> Result<bool> {
|
||||
Ok(self
|
||||
.conn
|
||||
.query_one::<bool>("SELECT NOT EXISTS (SELECT 1 FROM suggestions)")?)
|
||||
}
|
||||
|
||||
/// Fetches suggestions that match the given query from the database.
|
||||
pub fn fetch_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> {
|
||||
let unique_providers = query.providers.iter().collect::<HashSet<_>>();
|
||||
|
|
|
|||
161
third_party/rust/suggest/src/store.rs
vendored
161
third_party/rust/suggest/src/store.rs
vendored
|
|
@ -275,6 +275,8 @@ pub struct SuggestIngestionConstraints {
|
|||
/// soft limit, and the store might ingest more than requested.
|
||||
pub max_suggestions: Option<u64>,
|
||||
pub providers: Option<Vec<SuggestionProvider>>,
|
||||
/// Only run ingestion if the table `suggestions` is empty
|
||||
pub empty_only: bool,
|
||||
}
|
||||
|
||||
/// The implementation of the store. This is generic over the Remote Settings
|
||||
|
|
@ -357,6 +359,10 @@ where
|
|||
pub fn ingest(&self, constraints: SuggestIngestionConstraints) -> Result<()> {
|
||||
let writer = &self.dbs()?.writer;
|
||||
|
||||
if constraints.empty_only && !writer.read(|dao| dao.suggestions_table_empty())? {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if let Some(unparsable_records) =
|
||||
writer.read(|dao| dao.get_meta::<UnparsableRecords>(UNPARSABLE_RECORDS_META_KEY))?
|
||||
{
|
||||
|
|
@ -888,6 +894,12 @@ mod tests {
|
|||
|
||||
let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
|
||||
|
||||
// suggestions_table_empty returns true before the ingestion is complete
|
||||
assert!(store
|
||||
.dbs()?
|
||||
.reader
|
||||
.read(|dao| dao.suggestions_table_empty())?);
|
||||
|
||||
store.ingest(SuggestIngestionConstraints::default())?;
|
||||
|
||||
store.dbs()?.reader.read(|dao| {
|
||||
|
|
@ -927,6 +939,153 @@ mod tests {
|
|||
Ok(())
|
||||
})?;
|
||||
|
||||
// suggestions_table_empty returns false after the ingestion is complete
|
||||
assert!(!store
|
||||
.dbs()?
|
||||
.reader
|
||||
.read(|dao| dao.suggestions_table_empty())?);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Tests ingesting suggestions into an empty database.
|
||||
#[test]
|
||||
fn ingest_empty_only() -> anyhow::Result<()> {
|
||||
before_each();
|
||||
|
||||
// This ingestion should run, since the DB is empty
|
||||
let snapshot = Snapshot::with_records(json!([{
|
||||
"id": "1234",
|
||||
"type": "data",
|
||||
"last_modified": 15,
|
||||
"attachment": {
|
||||
"filename": "data-1.json",
|
||||
"mimetype": "application/json",
|
||||
"location": "data-1.json",
|
||||
"hash": "",
|
||||
"size": 0,
|
||||
},
|
||||
}]))?
|
||||
.with_data(
|
||||
"data-1.json",
|
||||
json!([{
|
||||
"id": 0,
|
||||
"advertiser": "Los Pollos Hermanos",
|
||||
"iab_category": "8 - Food & Drink",
|
||||
"keywords": ["lo", "los", "los p", "los pollos", "los pollos h", "los pollos hermanos"],
|
||||
"title": "Los Pollos Hermanos - Albuquerque",
|
||||
"url": "https://www.lph-nm.biz",
|
||||
"icon": "5678",
|
||||
"impression_url": "https://example.com/impression_url",
|
||||
"click_url": "https://example.com/click_url",
|
||||
"score": 0.3
|
||||
}]),
|
||||
)?;
|
||||
let mut store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
|
||||
store.ingest(SuggestIngestionConstraints {
|
||||
empty_only: true,
|
||||
..SuggestIngestionConstraints::default()
|
||||
})?;
|
||||
|
||||
store.dbs()?.reader.read(|dao| {
|
||||
expect![[r#"
|
||||
[
|
||||
Amp {
|
||||
title: "Los Pollos Hermanos - Albuquerque",
|
||||
url: "https://www.lph-nm.biz",
|
||||
raw_url: "https://www.lph-nm.biz",
|
||||
icon: None,
|
||||
icon_mimetype: None,
|
||||
full_keyword: "los",
|
||||
block_id: 0,
|
||||
advertiser: "Los Pollos Hermanos",
|
||||
iab_category: "8 - Food & Drink",
|
||||
impression_url: "https://example.com/impression_url",
|
||||
click_url: "https://example.com/click_url",
|
||||
raw_click_url: "https://example.com/click_url",
|
||||
score: 0.3,
|
||||
},
|
||||
]
|
||||
"#]]
|
||||
.assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
|
||||
keyword: "lo".into(),
|
||||
providers: vec![SuggestionProvider::Amp],
|
||||
limit: None,
|
||||
})?);
|
||||
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
// ingestion should run with SuggestIngestionConstraints::empty_only = true, since the DB
|
||||
// is empty
|
||||
store.settings_client = SnapshotSettingsClient::with_snapshot(Snapshot::with_records(json!([{
|
||||
"id": "1234",
|
||||
"type": "data",
|
||||
"last_modified": 15,
|
||||
"attachment": {
|
||||
"filename": "data-1.json",
|
||||
"mimetype": "application/json",
|
||||
"location": "data-1.json",
|
||||
"hash": "",
|
||||
"size": 0,
|
||||
},
|
||||
}, {
|
||||
"id": "12345",
|
||||
"type": "data",
|
||||
"last_modified": 15,
|
||||
"attachment": {
|
||||
"filename": "data-2.json",
|
||||
"mimetype": "application/json",
|
||||
"location": "data-2.json",
|
||||
"hash": "",
|
||||
"size": 0,
|
||||
},
|
||||
}]))?
|
||||
.with_data(
|
||||
"data-1.json",
|
||||
json!([{
|
||||
"id": 0,
|
||||
"advertiser": "Los Pollos Hermanos",
|
||||
"iab_category": "8 - Food & Drink",
|
||||
"keywords": ["lo", "los", "los p", "los pollos", "los pollos h", "los pollos hermanos"],
|
||||
"title": "Los Pollos Hermanos - Albuquerque",
|
||||
"url": "https://www.lph-nm.biz",
|
||||
"icon": "5678",
|
||||
"impression_url": "https://example.com/impression_url",
|
||||
"click_url": "https://example.com/click_url",
|
||||
"score": 0.3
|
||||
}])
|
||||
)?
|
||||
.with_data("data-2.json", json!([{
|
||||
"id": 1,
|
||||
"advertiser": "Good Place Eats",
|
||||
"iab_category": "8 - Food & Drink",
|
||||
"keywords": ["la", "las", "lasa", "lasagna", "lasagna come out tomorrow"],
|
||||
"title": "Lasagna Come Out Tomorrow",
|
||||
"url": "https://www.lasagna.restaurant",
|
||||
"icon": "2",
|
||||
"impression_url": "https://example.com/impression_url",
|
||||
"click_url": "https://example.com/click_url"
|
||||
}]),
|
||||
)?);
|
||||
store.ingest(SuggestIngestionConstraints {
|
||||
empty_only: true,
|
||||
..SuggestIngestionConstraints::default()
|
||||
})?;
|
||||
|
||||
store.dbs()?.reader.read(|dao| {
|
||||
expect![[r#"
|
||||
[]
|
||||
"#]]
|
||||
.assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
|
||||
keyword: "la".into(),
|
||||
providers: vec![SuggestionProvider::Amp],
|
||||
limit: None,
|
||||
})?);
|
||||
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
@ -2212,6 +2371,7 @@ mod tests {
|
|||
store.ingest(SuggestIngestionConstraints {
|
||||
max_suggestions: Some(max_suggestions),
|
||||
providers: Some(vec![SuggestionProvider::Amp]),
|
||||
..SuggestIngestionConstraints::default()
|
||||
})?;
|
||||
let actual_limit = store
|
||||
.settings_client
|
||||
|
|
@ -5201,6 +5361,7 @@ mod tests {
|
|||
let constraints = SuggestIngestionConstraints {
|
||||
max_suggestions: Some(100),
|
||||
providers: Some(vec![SuggestionProvider::Amp, SuggestionProvider::Pocket]),
|
||||
..SuggestIngestionConstraints::default()
|
||||
};
|
||||
store.ingest(constraints)?;
|
||||
|
||||
|
|
|
|||
8
third_party/rust/suggest/src/suggest.udl
vendored
8
third_party/rust/suggest/src/suggest.udl
vendored
|
|
@ -106,6 +106,14 @@ dictionary SuggestionQuery {
|
|||
dictionary SuggestIngestionConstraints {
|
||||
u64? max_suggestions = null;
|
||||
sequence<SuggestionProvider>? providers = null;
|
||||
// Only ingest if the table `suggestions` is empty.
|
||||
//
|
||||
// This is indented to handle periodic updates. Consumers can schedule an ingest with
|
||||
// `empty_only=true` on startup and a regular ingest with `empty_only=false` to run on a long periodic schedule (maybe
|
||||
// once a day). This allows ingestion to normally be run at a slow, periodic rate. However, if
|
||||
// there is a schema upgrade that causes the database to be thrown away, then the
|
||||
// `empty_only=true` ingestion that runs on startup will repopulate it.
|
||||
boolean empty_only = false;
|
||||
};
|
||||
|
||||
dictionary SuggestGlobalConfig {
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ ChromeUtils.defineESModuleGetters(lazy, {
|
|||
"resource://gre/modules/contentrelevancy/private/InputUtils.sys.mjs",
|
||||
NimbusFeatures: "resource://nimbus/ExperimentAPI.sys.mjs",
|
||||
RelevancyStore: "resource://gre/modules/RustRelevancy.sys.mjs",
|
||||
InterestVector: "resource://gre/modules/RustRelevancy.sys.mjs",
|
||||
});
|
||||
|
||||
XPCOMUtils.defineLazyServiceGetter(
|
||||
|
|
@ -40,6 +41,7 @@ const NIMBUS_VARIABLE_ENABLED = "enabled";
|
|||
const NIMBUS_VARIABLE_MAX_INPUT_URLS = "maxInputUrls";
|
||||
const NIMBUS_VARIABLE_MIN_INPUT_URLS = "minInputUrls";
|
||||
const NIMBUS_VARIABLE_TIMER_INTERVAL = "timerInterval";
|
||||
const NIMBUS_VARIABLE_INGEST_ENABLED = "ingestEnabled";
|
||||
|
||||
ChromeUtils.defineLazyGetter(lazy, "log", () => {
|
||||
return console.createInstance({
|
||||
|
|
@ -243,18 +245,21 @@ class RelevancyManager {
|
|||
lazy.log.info("Starting interest classification");
|
||||
timerId = Glean.relevancyClassify.duration.start();
|
||||
|
||||
await this.#doClassificationHelper(urls);
|
||||
const interestVector = await this.#doClassificationHelper(urls);
|
||||
const sortedVector = Object.entries(interestVector).sort(
|
||||
([, a], [, b]) => b - a // descending
|
||||
);
|
||||
lazy.log.info(`Classification results: ${JSON.stringify(sortedVector)}`);
|
||||
|
||||
Glean.relevancyClassify.duration.stopAndAccumulate(timerId);
|
||||
Glean.relevancyClassify.succeed.record({
|
||||
input_size: urls.length,
|
||||
// TODO(nanj): Fill out the actual counters once the classification is enabled.
|
||||
input_classified_size: 0,
|
||||
input_inconclusive_size: 0,
|
||||
output_interest_size: 0,
|
||||
interest_top_1_hits: 0,
|
||||
interest_top_2_hits: 0,
|
||||
interest_top_3_hits: 0,
|
||||
input_classified_size: sortedVector.reduce((acc, [, v]) => acc + v, 0),
|
||||
input_inconclusive_size: interestVector.inconclusive,
|
||||
output_interest_size: sortedVector.filter(([, v]) => v != 0).length,
|
||||
interest_top_1_hits: sortedVector[0][1],
|
||||
interest_top_2_hits: sortedVector[1][1],
|
||||
interest_top_3_hits: sortedVector[2][1],
|
||||
});
|
||||
} catch (error) {
|
||||
let reason;
|
||||
|
|
@ -290,28 +295,48 @@ class RelevancyManager {
|
|||
*
|
||||
* @param {Array} urls
|
||||
* An array of URLs.
|
||||
* @returns {InterestVector}
|
||||
* An interest vector.
|
||||
* @throws {StoreNotAvailableError}
|
||||
* Thrown when the store became unavailable (i.e. set to null elsewhere).
|
||||
* @throws {RelevancyAPIError}
|
||||
* Thrown for other API errors on the store.
|
||||
*/
|
||||
async #doClassificationHelper(urls) {
|
||||
// The following logs are unnecessary, only used to suppress the linting error.
|
||||
// TODO(nanj): delete me once the following TODO is done.
|
||||
if (!this.#store) {
|
||||
lazy.log.error("#store became null, aborting interest classification");
|
||||
}
|
||||
lazy.log.info("Classification input: " + urls);
|
||||
|
||||
// TODO(nanj): uncomment the following once `ingest()` is implemented.
|
||||
// await this.#store.ingest(urls);
|
||||
}
|
||||
let interestVector = new lazy.InterestVector({
|
||||
animals: 0,
|
||||
arts: 0,
|
||||
autos: 0,
|
||||
business: 0,
|
||||
career: 0,
|
||||
education: 0,
|
||||
fashion: 0,
|
||||
finance: 0,
|
||||
food: 0,
|
||||
government: 0,
|
||||
hobbies: 0,
|
||||
home: 0,
|
||||
news: 0,
|
||||
realEstate: 0,
|
||||
society: 0,
|
||||
sports: 0,
|
||||
tech: 0,
|
||||
travel: 0,
|
||||
inconclusive: 0,
|
||||
});
|
||||
|
||||
/**
|
||||
* Exposed for testing.
|
||||
*/
|
||||
async _test_doClassificationHelper(urls) {
|
||||
await this.#doClassificationHelper(urls);
|
||||
if (
|
||||
lazy.NimbusFeatures.contentRelevancy.getVariable(
|
||||
NIMBUS_VARIABLE_INGEST_ENABLED
|
||||
) ??
|
||||
false
|
||||
) {
|
||||
interestVector = await this.#store.ingest(urls);
|
||||
}
|
||||
|
||||
return interestVector;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ add_task(async function test_NimbusIntegration_enable() {
|
|||
maxInputUrls: 3,
|
||||
// Set the timer interval to 0 will trigger the timer right away.
|
||||
timerInterval: 0,
|
||||
ingestEnabled: false,
|
||||
},
|
||||
});
|
||||
|
||||
|
|
@ -73,6 +74,7 @@ add_task(async function test_NimbusIntegration_disable() {
|
|||
maxInputUrls: 3,
|
||||
// Set the timer interval to 0 will trigger the timer right away.
|
||||
timerInterval: 0,
|
||||
ingestEnabled: false,
|
||||
},
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -100,24 +100,6 @@ add_task(async function test_call_disable_twice() {
|
|||
Services.prefs.clearUserPref(PREF_CONTENT_RELEVANCY_ENABLED);
|
||||
});
|
||||
|
||||
add_task(async function test_doClassificationHelper() {
|
||||
Services.prefs.setBoolPref(PREF_CONTENT_RELEVANCY_ENABLED, true);
|
||||
await TestUtils.waitForCondition(() => ContentRelevancyManager._isStoreReady);
|
||||
await ContentRelevancyManager._test_doClassificationHelper([]);
|
||||
|
||||
// Disable it to reset the store.
|
||||
Services.prefs.setBoolPref(PREF_CONTENT_RELEVANCY_ENABLED, false);
|
||||
await TestUtils.waitForTick();
|
||||
|
||||
await Assert.rejects(
|
||||
ContentRelevancyManager._test_doClassificationHelper([]),
|
||||
/Store is not available/,
|
||||
"Should throw with an unset store"
|
||||
);
|
||||
|
||||
Services.prefs.clearUserPref(PREF_CONTENT_RELEVANCY_ENABLED);
|
||||
});
|
||||
|
||||
/**
|
||||
* Sets up the update timer manager for testing: makes it fire more often,
|
||||
* removes all existing timers, and initializes it for testing. The body of this
|
||||
|
|
|
|||
|
|
@ -7,10 +7,6 @@ const { ContentRelevancyManager } = ChromeUtils.importESModule(
|
|||
"resource://gre/modules/ContentRelevancyManager.sys.mjs"
|
||||
);
|
||||
|
||||
const { TestUtils } = ChromeUtils.importESModule(
|
||||
"resource://testing-common/TestUtils.sys.mjs"
|
||||
);
|
||||
|
||||
const PREF_CONTENT_RELEVANCY_ENABLED = "toolkit.contentRelevancy.enabled";
|
||||
|
||||
add_setup(async function setup() {
|
||||
|
|
@ -83,39 +79,3 @@ add_task(async function test_classify_fail_case1() {
|
|||
"Should not record the duration"
|
||||
);
|
||||
});
|
||||
|
||||
/**
|
||||
* Test classification metrics - fail - store-not-ready.
|
||||
*/
|
||||
add_task(async function test_classify_fail_case2() {
|
||||
Services.fog.testResetFOG();
|
||||
|
||||
// Toggle the pref to disable the manager and nullify the store.
|
||||
Services.prefs.setBoolPref(PREF_CONTENT_RELEVANCY_ENABLED, false);
|
||||
await TestUtils.waitForTick();
|
||||
|
||||
await TestUtils.waitForCondition(
|
||||
() => !ContentRelevancyManager.shouldEnable,
|
||||
"Should be disabled via pref"
|
||||
);
|
||||
|
||||
Assert.equal(null, Glean.relevancyClassify.fail.testGetValue());
|
||||
Assert.equal(null, Glean.relevancyClassify.duration.testGetValue());
|
||||
|
||||
await ContentRelevancyManager._test_doClassification();
|
||||
|
||||
Assert.deepEqual(
|
||||
{
|
||||
reason: "store-not-ready",
|
||||
},
|
||||
Glean.relevancyClassify.fail.testGetValue()[0].extra,
|
||||
"Should record the fail event"
|
||||
);
|
||||
Assert.equal(
|
||||
null,
|
||||
Glean.relevancyClassify.duration.testGetValue(),
|
||||
"Should not record the duration"
|
||||
);
|
||||
|
||||
Services.prefs.setBoolPref(PREF_CONTENT_RELEVANCY_ENABLED, true);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -2701,6 +2701,10 @@ contentRelevancy:
|
|||
setPref:
|
||||
branch: user
|
||||
pref: toolkit.contentRelevancy.timerInterval
|
||||
ingestEnabled:
|
||||
description: Enable the ingestion through the Rust component
|
||||
type: boolean
|
||||
fallbackPref: toolkit.contentRelevancy.ingestEnabled
|
||||
|
||||
tabPreview:
|
||||
description: Prefs to control Tab Previews
|
||||
|
|
|
|||
|
|
@ -322,7 +322,7 @@ export class RelevancyStore {
|
|||
*/
|
||||
static init(dbpath) {
|
||||
const liftResult = (result) => FfiConverterTypeRelevancyStore.lift(result);
|
||||
const liftError = (data) => FfiConverterTypeRelevancyApiError.lift(data);
|
||||
const liftError = null;
|
||||
const functionCall = () => {
|
||||
try {
|
||||
FfiConverterString.checkType(dbpath)
|
||||
|
|
@ -368,7 +368,7 @@ export class RelevancyStore {
|
|||
}
|
||||
|
||||
ingest(topUrls) {
|
||||
const liftResult = (result) => undefined;
|
||||
const liftResult = (result) => FfiConverterTypeInterestVector.lift(result);
|
||||
const liftError = (data) => FfiConverterTypeRelevancyApiError.lift(data);
|
||||
const functionCall = () => {
|
||||
try {
|
||||
|
|
|
|||
|
|
@ -915,7 +915,7 @@ export class FfiConverterTypeSuggestGlobalConfig extends FfiConverterArrayBuffer
|
|||
}
|
||||
|
||||
export class SuggestIngestionConstraints {
|
||||
constructor({ maxSuggestions = null, providers = null } = {}) {
|
||||
constructor({ maxSuggestions = null, providers = null, emptyOnly = false } = {}) {
|
||||
try {
|
||||
FfiConverterOptionalu64.checkType(maxSuggestions)
|
||||
} catch (e) {
|
||||
|
|
@ -932,13 +932,23 @@ export class SuggestIngestionConstraints {
|
|||
}
|
||||
throw e;
|
||||
}
|
||||
try {
|
||||
FfiConverterBool.checkType(emptyOnly)
|
||||
} catch (e) {
|
||||
if (e instanceof UniFFITypeError) {
|
||||
e.addItemDescriptionPart("emptyOnly");
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
this.maxSuggestions = maxSuggestions;
|
||||
this.providers = providers;
|
||||
this.emptyOnly = emptyOnly;
|
||||
}
|
||||
equals(other) {
|
||||
return (
|
||||
this.maxSuggestions == other.maxSuggestions &&
|
||||
this.providers == other.providers
|
||||
this.providers == other.providers &&
|
||||
this.emptyOnly == other.emptyOnly
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
@ -949,17 +959,20 @@ export class FfiConverterTypeSuggestIngestionConstraints extends FfiConverterArr
|
|||
return new SuggestIngestionConstraints({
|
||||
maxSuggestions: FfiConverterOptionalu64.read(dataStream),
|
||||
providers: FfiConverterOptionalSequenceTypeSuggestionProvider.read(dataStream),
|
||||
emptyOnly: FfiConverterBool.read(dataStream),
|
||||
});
|
||||
}
|
||||
static write(dataStream, value) {
|
||||
FfiConverterOptionalu64.write(dataStream, value.maxSuggestions);
|
||||
FfiConverterOptionalSequenceTypeSuggestionProvider.write(dataStream, value.providers);
|
||||
FfiConverterBool.write(dataStream, value.emptyOnly);
|
||||
}
|
||||
|
||||
static computeSize(value) {
|
||||
let totalSize = 0;
|
||||
totalSize += FfiConverterOptionalu64.computeSize(value.maxSuggestions);
|
||||
totalSize += FfiConverterOptionalSequenceTypeSuggestionProvider.computeSize(value.providers);
|
||||
totalSize += FfiConverterBool.computeSize(value.emptyOnly);
|
||||
return totalSize
|
||||
}
|
||||
|
||||
|
|
@ -984,6 +997,14 @@ export class FfiConverterTypeSuggestIngestionConstraints extends FfiConverterArr
|
|||
}
|
||||
throw e;
|
||||
}
|
||||
try {
|
||||
FfiConverterBool.checkType(value.emptyOnly);
|
||||
} catch (e) {
|
||||
if (e instanceof UniFFITypeError) {
|
||||
e.addItemDescriptionPart(".emptyOnly");
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ extern "C" {
|
|||
void * uniffi_relevancy_fn_constructor_relevancystore_new(RustBuffer, RustCallStatus*);
|
||||
RustBuffer uniffi_relevancy_fn_method_relevancystore_calculate_metrics(void *, RustCallStatus*);
|
||||
void uniffi_relevancy_fn_method_relevancystore_close(void *, RustCallStatus*);
|
||||
void uniffi_relevancy_fn_method_relevancystore_ingest(void *, RustBuffer, RustCallStatus*);
|
||||
RustBuffer uniffi_relevancy_fn_method_relevancystore_ingest(void *, RustBuffer, RustCallStatus*);
|
||||
void uniffi_relevancy_fn_method_relevancystore_interrupt(void *, RustCallStatus*);
|
||||
RustBuffer uniffi_relevancy_fn_method_relevancystore_user_interest_vector(void *, RustCallStatus*);
|
||||
void * uniffi_remote_settings_fn_clone_remotesettings(void *, RustCallStatus*);
|
||||
|
|
@ -144,7 +144,7 @@ Maybe<already_AddRefed<Promise>> UniFFICallAsync(const GlobalObject& aGlobal, ui
|
|||
return Some(CallHandler::CallAsync(uniffi_relevancy_fn_method_relevancystore_close, aGlobal, aArgs, "uniffi_relevancy_fn_method_relevancystore_close: "_ns, aError));
|
||||
}
|
||||
case 4: { // relevancy:uniffi_relevancy_fn_method_relevancystore_ingest
|
||||
using CallHandler = ScaffoldingCallHandler<ScaffoldingConverter<void>, ScaffoldingObjectConverter<&kRelevancyRelevancyStorePointerType>, ScaffoldingConverter<RustBuffer>>;
|
||||
using CallHandler = ScaffoldingCallHandler<ScaffoldingConverter<RustBuffer>, ScaffoldingObjectConverter<&kRelevancyRelevancyStorePointerType>, ScaffoldingConverter<RustBuffer>>;
|
||||
return Some(CallHandler::CallAsync(uniffi_relevancy_fn_method_relevancystore_ingest, aGlobal, aArgs, "uniffi_relevancy_fn_method_relevancystore_ingest: "_ns, aError));
|
||||
}
|
||||
case 5: { // relevancy:uniffi_relevancy_fn_method_relevancystore_interrupt
|
||||
|
|
@ -354,7 +354,7 @@ bool UniFFICallSync(const GlobalObject& aGlobal, uint64_t aId, const Sequence<Un
|
|||
return true;
|
||||
}
|
||||
case 4: { // relevancy:uniffi_relevancy_fn_method_relevancystore_ingest
|
||||
using CallHandler = ScaffoldingCallHandler<ScaffoldingConverter<void>, ScaffoldingObjectConverter<&kRelevancyRelevancyStorePointerType>, ScaffoldingConverter<RustBuffer>>;
|
||||
using CallHandler = ScaffoldingCallHandler<ScaffoldingConverter<RustBuffer>, ScaffoldingObjectConverter<&kRelevancyRelevancyStorePointerType>, ScaffoldingConverter<RustBuffer>>;
|
||||
CallHandler::CallSync(uniffi_relevancy_fn_method_relevancystore_ingest, aGlobal, aArgs, aReturnValue, "uniffi_relevancy_fn_method_relevancystore_ingest: "_ns, aError);
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue